From 33686ea851414e0d796690c7c97c453f642d4b3f Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 15 Jan 2025 17:35:42 +0000 Subject: [PATCH 1/8] Update versions for GH Actions and Geckodriver. --- .github/workflows/docker-publish.yaml | 14 +++++++------- .github/workflows/python-publish.yaml | 17 +++++------------ Dockerfile | 2 +- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/.github/workflows/docker-publish.yaml b/.github/workflows/docker-publish.yaml index 379aaaa..4d232e2 100644 --- a/.github/workflows/docker-publish.yaml +++ b/.github/workflows/docker-publish.yaml @@ -25,22 +25,22 @@ jobs: uses: actions/checkout@v3 - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3 # https://github.com/docker/setup-buildx-action - + - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v1 - + uses: docker/setup-buildx-action@v3 + - name: Log in to Docker Hub - uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - + - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + uses: docker/metadata-action@369eb591f429131d6889c46b94e711f089e6ca96 with: images: bellingcat/auto-archiver diff --git a/.github/workflows/python-publish.yaml b/.github/workflows/python-publish.yaml index b63a560..5badd29 100644 --- a/.github/workflows/python-publish.yaml +++ b/.github/workflows/python-publish.yaml @@ -22,27 +22,20 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@v3 - - - name: Extract Python Version from pyproject.toml - id: python-version - run: | - version=$(grep 'python =' pyproject.toml | awk -F'"' '{print $2}' | tr -d '^~<=>') - echo "python-version=$version" >> $GITHUB_ENV + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: ${{ env.python-version }} + python-version-file: pyproject.toml - name: Install Poetry run: | - python -m pip install --upgrade pip - python -m pip install "poetry>=2.0.0,<3.0.0" + pipx install "poetry>=2.0.0,<3.0.0" - name: Install dependencies run: | - poetry install --no-root + poetry install --no-interaction --no-root - name: Build the package run: | diff --git a/Dockerfile b/Dockerfile index 3b1e252..0ecc7f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ RUN add-apt-repository ppa:mozillateam/ppa && \ apt-get install -y --no-install-recommends gcc ffmpeg fonts-noto exiftool && \ apt-get install -y --no-install-recommends firefox-esr && \ ln -s /usr/bin/firefox-esr /usr/bin/firefox && \ - wget https://github.com/mozilla/geckodriver/releases/download/v0.33.0/geckodriver-v0.33.0-linux64.tar.gz && \ + wget https://github.com/mozilla/geckodriver/releases/download/v0.35.0/geckodriver-v0.35.0-linux64.tar.gz && \ tar -xvzf geckodriver* -C /usr/local/bin && \ chmod +x /usr/local/bin/geckodriver && \ rm geckodriver-v* && \ From d3eec5d90fec501d718d48b8c6fbbd6f797d07c5 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 15 Jan 2025 21:45:29 +0000 Subject: [PATCH 2/8] Basic docs structure for RTD --- .readthedocs.yaml | 22 + docs/Makefile | 20 + docs/source/conf.py | 64 ++ docs/source/configurations.rst | 35 + docs/source/developer_guidelines.rst | 6 + docs/source/index.rst | 27 + docs/source/user_guidelines.rst | 11 + poetry.lock | 643 +++++++++++++++++- pyproject.toml | 9 + src/auto_archiver/__main__.py | 1 + src/auto_archiver/archivers/__init__.py | 9 +- src/auto_archiver/archivers/archiver.py | 15 +- .../archivers/instagram_api_archiver.py | 13 +- .../archivers/instagram_archiver.py | 5 + .../archivers/instagram_tbot_archiver.py | 8 + .../archivers/tiktok_archiver.py | 1 + .../archivers/twitter_archiver.py | 2 +- .../archivers/youtubedl_archiver.py | 29 +- src/auto_archiver/core/__init__.py | 3 + src/auto_archiver/core/config.py | 6 + src/auto_archiver/core/context.py | 17 +- src/auto_archiver/core/media.py | 19 +- src/auto_archiver/core/metadata.py | 18 +- src/auto_archiver/core/orchestrator.py | 6 + src/auto_archiver/core/step.py | 15 +- src/auto_archiver/databases/__init__.py | 4 + src/auto_archiver/databases/api_db.py | 4 +- src/auto_archiver/enrichers/__init__.py | 12 + src/auto_archiver/enrichers/enricher.py | 2 + src/auto_archiver/enrichers/hash_enricher.py | 9 + .../enrichers/pdq_hash_enricher.py | 12 + .../enrichers/thumbnail_enricher.py | 8 + src/auto_archiver/feeders/__init__.py | 3 + src/auto_archiver/feeders/gsheet_feeder.py | 10 + src/auto_archiver/formatters/__init__.py | 1 + src/auto_archiver/storages/__init__.py | 3 + src/auto_archiver/utils/__init__.py | 1 + src/auto_archiver/utils/webdriver.py | 1 + 38 files changed, 1034 insertions(+), 40 deletions(-) create mode 100644 .readthedocs.yaml create mode 100644 docs/Makefile create mode 100644 docs/source/conf.py create mode 100644 docs/source/configurations.rst create mode 100644 docs/source/developer_guidelines.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/user_guidelines.rst diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..c8c07ac --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,22 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + + +build: + os: ubuntu-22.04 + tools: + python: "3.10" + jobs: + post_install: + - pip install poetry + # https://python-poetry.org/docs/managing-dependencies/#dependency-groups + # VIRTUAL_ENV needs to be set manually for now. + # See https://github.com/readthedocs/readthedocs.org/pull/11152/ + - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --only docs + + +sphinx: + configuration: docs/conf.py diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..92dd33a --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..3168b22 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,64 @@ +# Configuration file for the Sphinx documentation builder. +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +from importlib.metadata import metadata + +package_metadata = metadata("auto-archiver") +project = package_metadata["name"] +authors = package_metadata["authors"] +release = package_metadata["version"] + + +# -- General configuration --------------------------------------------------- +extensions = [ + "autoapi.extension", # Generate API documentation from docstrings + "myst_parser", # Markdown support + 'sphinxcontrib.mermaid', # Mermaid diagrams + "sphinx.ext.viewcode", # Source code links + "sphinx.ext.napoleon", # Google-style and NumPy-style docstrings + # "sphinx.ext.autodoc", # Include custom docstrings + # 'sphinx.ext.autosummary', # Summarize module/class/function docs +] + +templates_path = ['_templates'] +exclude_patterns = [] + + +# -- AutoAPI Configuration --------------------------------------------------- +autoapi_type = 'python' +autoapi_dirs = ["../../src"] +autodoc_typehints = "signature" # Include type hints in the signature +autoapi_ignore = [] # Ignore specific modules +autoapi_keep_files = True # Option to retain intermediate JSON files for debugging +autoapi_add_toctree_entry = True # Include API docs in the TOC +autoapi_template_dir = None # Use default templates +autoapi_options = [ + "members", + "undoc-members", + "show-inheritance", + "show-module-summary", + "imported-members", +] + + +# -- Markdown Support -------------------------------------------------------- +myst_enable_extensions = [ + "colon_fence", # ::: fences + "deflist", # Definition lists + "html_admonition", # HTML-style admonitions + "html_image", # Inline HTML images + "replacements", # Substitutions like (C) + "smartquotes", # Smart quotes + "linkify", # Auto-detect links + "substitution", # Text substitutions +] +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} + +# -- Options for HTML output ------------------------------------------------- +html_theme = 'furo' +# html_static_path = ['_static'] + diff --git a/docs/source/configurations.rst b/docs/source/configurations.rst new file mode 100644 index 0000000..3fb482c --- /dev/null +++ b/docs/source/configurations.rst @@ -0,0 +1,35 @@ + +Configurations +============== + +This section of the documentation provides guidelines for configuring the tool. + +File Reference +-------------- + + +Below is the content of the `example.orchestration.yaml` file: + +.. raw:: html + +
+ View example.orchestration.yaml + +.. literalinclude:: ../../example.orchestration.yaml + :language: yaml + :caption: example.orchestration.yaml + +.. raw:: html + +
+ + +Configs +------- + +This section of the documentation will show the custom configurations for the individual steps of the tool. + + +.. include:: configs.rst + + diff --git a/docs/source/developer_guidelines.rst b/docs/source/developer_guidelines.rst new file mode 100644 index 0000000..c0fdee0 --- /dev/null +++ b/docs/source/developer_guidelines.rst @@ -0,0 +1,6 @@ + +Developer Guidelines +==================== + +This section of the documentation provides guidelines for developers who want to modify or contribute to the tool. + diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..22b6e1e --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,27 @@ +.. auto-archiver documentation master file, created by + sphinx-quickstart on Sun Jan 12 20:35:50 2025. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Auto Archiver documentation +=========================== + +.. note:: + + This is a work in progress. + + +.. include:: ../../README.md + :parser: myst + + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Contents: + + developer_guidelines + configurations + user_guidelines + configs + diff --git a/docs/source/user_guidelines.rst b/docs/source/user_guidelines.rst new file mode 100644 index 0000000..93fb2f2 --- /dev/null +++ b/docs/source/user_guidelines.rst @@ -0,0 +1,11 @@ + +User Guidelines +=============== + +This section of the documentation provides guidelines for users who want to use the tool, +without needing to modify the code. +To see the developer guidelines, see :ref:`developer_guidelines`. + +.. note:: + + This is a work in progress. diff --git a/poetry.lock b/poetry.lock index 872db7c..ec3c537 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,40 @@ # This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. +[[package]] +name = "alabaster" +version = "1.0.0" +description = "A light, configurable Sphinx theme" +optional = false +python-versions = ">=3.10" +groups = ["docs"] +files = [ + {file = "alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b"}, + {file = "alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e"}, +] + +[[package]] +name = "anyio" +version = "4.8.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a"}, + {file = "anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a"}, +] + +[package.dependencies] +exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" +typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} + +[package.extras] +doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] +trio = ["trio (>=0.26.1)"] + [[package]] name = "asn1crypto" version = "1.5.1" @@ -12,6 +47,21 @@ files = [ {file = "asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c"}, ] +[[package]] +name = "astroid" +version = "3.3.8" +description = "An abstract syntax tree for Python with inference support." +optional = false +python-versions = ">=3.9.0" +groups = ["docs"] +files = [ + {file = "astroid-3.3.8-py3-none-any.whl", hash = "sha256:187ccc0c248bfbba564826c26f070494f7bc964fd286b6d9fff4420e55de828c"}, + {file = "astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} + [[package]] name = "attrs" version = "24.3.0" @@ -63,13 +113,28 @@ files = [ pycodestyle = ">=2.12.0" tomli = {version = "*", markers = "python_version < \"3.11\""} +[[package]] +name = "babel" +version = "2.16.0" +description = "Internationalization utilities" +optional = false +python-versions = ">=3.8" +groups = ["docs"] +files = [ + {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"}, + {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"}, +] + +[package.extras] +dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] + [[package]] name = "beautifulsoup4" version = "4.12.3" description = "Screen-scraping library" optional = false python-versions = ">=3.6.0" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, @@ -87,18 +152,18 @@ lxml = ["lxml"] [[package]] name = "boto3" -version = "1.35.98" +version = "1.35.99" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "boto3-1.35.98-py3-none-any.whl", hash = "sha256:d0224e1499d7189b47aa7f469d96522d98df6f5702fccb20a95a436582ebcd9d"}, - {file = "boto3-1.35.98.tar.gz", hash = "sha256:4b6274b4fe9d7113f978abea66a1f20c8a397c268c9d1b2a6c96b14a256da4a5"}, + {file = "boto3-1.35.99-py3-none-any.whl", hash = "sha256:83e560faaec38a956dfb3d62e05e1703ee50432b45b788c09e25107c5058bd71"}, + {file = "boto3-1.35.99.tar.gz", hash = "sha256:e0abd794a7a591d90558e92e29a9f8837d25ece8e3c120e530526fe27eba5fca"}, ] [package.dependencies] -botocore = ">=1.35.98,<1.36.0" +botocore = ">=1.35.99,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -107,14 +172,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.98" +version = "1.35.99" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "botocore-1.35.98-py3-none-any.whl", hash = "sha256:4f1c0b687488663a774ad3a5e81a5f94fae1bcada2364cfdc48482c4dbf794d5"}, - {file = "botocore-1.35.98.tar.gz", hash = "sha256:d11742b3824bdeac3c89eeeaf5132351af41823bbcef8fc15e95c8250b1de09c"}, + {file = "botocore-1.35.99-py3-none-any.whl", hash = "sha256:b22d27b6b617fc2d7342090d6129000af2efd20174215948c0d7ae2da0fab445"}, + {file = "botocore-1.35.99.tar.gz", hash = "sha256:1eab44e969c39c5f3d9a3104a0836c24715579a455f12b3979a31d7cde51b3c3"}, ] [package.dependencies] @@ -335,7 +400,7 @@ version = "2024.12.14" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, @@ -443,7 +508,7 @@ version = "3.4.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, @@ -539,13 +604,28 @@ files = [ {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, ] +[[package]] +name = "click" +version = "8.1.8" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +groups = ["docs"] +files = [ + {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, + {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main", "dev"] +groups = ["main", "dev", "docs"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -637,13 +717,25 @@ calendars = ["convertdate", "hijri-converter"] fasttext = ["fasttext"] langdetect = ["langdetect"] +[[package]] +name = "docutils" +version = "0.21.2" +description = "Docutils -- Python Documentation Utilities" +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"}, + {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"}, +] + [[package]] name = "exceptiongroup" version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" -groups = ["main", "dev"] +groups = ["main", "dev", "docs"] markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, @@ -671,6 +763,24 @@ future = "*" [package.extras] dev = ["Sphinx (==2.1.0)", "future (==0.17.1)", "numpy (==1.16.4)", "pytest (==4.6.1)", "pytest-mock (==1.10.4)", "tox (==3.12.1)"] +[[package]] +name = "furo" +version = "2024.8.6" +description = "A clean customisable Sphinx documentation theme." +optional = false +python-versions = ">=3.8" +groups = ["docs"] +files = [ + {file = "furo-2024.8.6-py3-none-any.whl", hash = "sha256:6cd97c58b47813d3619e63e9081169880fbe331f0ca883c871ff1f3f11814f5c"}, + {file = "furo-2024.8.6.tar.gz", hash = "sha256:b63e4cee8abfc3136d3bc03a3d45a76a850bada4d6374d24c1716b0e01394a01"}, +] + +[package.dependencies] +beautifulsoup4 = "*" +pygments = ">=2.7" +sphinx = ">=6.0,<9.0" +sphinx-basic-ng = ">=1.0.0.beta2" + [[package]] name = "future" version = "1.0.0" @@ -827,7 +937,7 @@ version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, @@ -854,7 +964,7 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -863,6 +973,18 @@ files = [ [package.extras] all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] +[[package]] +name = "imagesize" +version = "1.4.1" +description = "Getting image size from png/jpeg/jpeg2000/gif file" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["docs"] +files = [ + {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, + {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -898,7 +1020,7 @@ version = "3.1.5" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"}, {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"}, @@ -956,13 +1078,38 @@ win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] dev = ["Sphinx (==8.1.3)", "build (==1.2.2)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.5.0)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.13.0)", "mypy (==v1.4.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pytest (==6.1.2)", "pytest (==8.3.2)", "pytest-cov (==2.12.1)", "pytest-cov (==5.0.0)", "pytest-cov (==6.0.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.1.0)", "sphinx-rtd-theme (==3.0.2)", "tox (==3.27.1)", "tox (==4.23.2)", "twine (==6.0.1)"] +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +groups = ["docs"] +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "markupsafe" version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -1047,6 +1194,38 @@ dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"] docs = ["autodocsumm (==0.2.14)", "furo (==2024.8.6)", "sphinx (==8.1.3)", "sphinx-copybutton (==0.5.2)", "sphinx-issues (==5.0.0)", "sphinxext-opengraph (==0.9.1)"] tests = ["pytest", "simplejson"] +[[package]] +name = "mdit-py-plugins" +version = "0.4.2" +description = "Collection of plugins for markdown-it-py" +optional = false +python-versions = ">=3.8" +groups = ["docs"] +files = [ + {file = "mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636"}, + {file = "mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5"}, +] + +[package.dependencies] +markdown-it-py = ">=1.0.0,<4.0.0" + +[package.extras] +code-style = ["pre-commit"] +rtd = ["myst-parser", "sphinx-book-theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +groups = ["docs"] +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + [[package]] name = "mutagen" version = "1.47.0" @@ -1071,6 +1250,33 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "myst-parser" +version = "4.0.0" +description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser," +optional = false +python-versions = ">=3.10" +groups = ["docs"] +files = [ + {file = "myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d"}, + {file = "myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531"}, +] + +[package.dependencies] +docutils = ">=0.19,<0.22" +jinja2 = "*" +markdown-it-py = ">=3.0,<4.0" +mdit-py-plugins = ">=0.4.1,<1.0" +pyyaml = "*" +sphinx = ">=7,<9" + +[package.extras] +code-style = ["pre-commit (>=3.0,<4.0)"] +linkify = ["linkify-it-py (>=2.0,<3.0)"] +rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-book-theme (>=1.1,<2.0)", "sphinx-copybutton", "sphinx-design", "sphinx-pyscript", "sphinx-tippy (>=0.4.3)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.9.0,<0.10.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] +testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"] +testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"] + [[package]] name = "numpy" version = "2.1.3" @@ -1208,7 +1414,7 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" -groups = ["main", "dev"] +groups = ["main", "dev", "docs"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -1483,6 +1689,21 @@ files = [ {file = "pycryptodomex-3.21.0.tar.gz", hash = "sha256:222d0bd05381dd25c32dd6065c071ebf084212ab79bab4599ba9e6a3e0009e6c"}, ] +[[package]] +name = "pygments" +version = "2.19.1" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +groups = ["docs"] +files = [ + {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"}, + {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + [[package]] name = "pyopenssl" version = "24.2.1" @@ -1633,7 +1854,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -1800,7 +2021,7 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -1922,12 +2143,24 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "snowballstemmer" +version = "2.2.0" +description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +optional = false +python-versions = "*" +groups = ["docs"] +files = [ + {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, + {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, +] + [[package]] name = "sortedcontainers" version = "2.4.0" @@ -1946,12 +2179,266 @@ version = "2.6" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, ] +[[package]] +name = "sphinx" +version = "8.1.3" +description = "Python documentation generator" +optional = false +python-versions = ">=3.10" +groups = ["docs"] +files = [ + {file = "sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2"}, + {file = "sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927"}, +] + +[package.dependencies] +alabaster = ">=0.7.14" +babel = ">=2.13" +colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""} +docutils = ">=0.20,<0.22" +imagesize = ">=1.3" +Jinja2 = ">=3.1" +packaging = ">=23.0" +Pygments = ">=2.17" +requests = ">=2.30.0" +snowballstemmer = ">=2.2" +sphinxcontrib-applehelp = ">=1.0.7" +sphinxcontrib-devhelp = ">=1.0.6" +sphinxcontrib-htmlhelp = ">=2.0.6" +sphinxcontrib-jsmath = ">=1.0.1" +sphinxcontrib-qthelp = ">=1.0.6" +sphinxcontrib-serializinghtml = ">=1.1.9" +tomli = {version = ">=2", markers = "python_version < \"3.11\""} + +[package.extras] +docs = ["sphinxcontrib-websupport"] +lint = ["flake8 (>=6.0)", "mypy (==1.11.1)", "pyright (==1.1.384)", "pytest (>=6.0)", "ruff (==0.6.9)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-Pillow (==10.2.0.20240822)", "types-Pygments (==2.18.0.20240506)", "types-colorama (==0.4.15.20240311)", "types-defusedxml (==0.7.0.20240218)", "types-docutils (==0.21.0.20241005)", "types-requests (==2.32.0.20240914)", "types-urllib3 (==1.26.25.14)"] +test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"] + +[[package]] +name = "sphinx-autoapi" +version = "3.4.0" +description = "Sphinx API documentation generator" +optional = false +python-versions = ">=3.8" +groups = ["docs"] +files = [ + {file = "sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92"}, + {file = "sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c"}, +] + +[package.dependencies] +astroid = [ + {version = ">=2.7", markers = "python_version < \"3.12\""}, + {version = ">=3.0.0a1", markers = "python_version >= \"3.12\""}, +] +Jinja2 = "*" +PyYAML = "*" +sphinx = ">=6.1.0" + +[[package]] +name = "sphinx-autobuild" +version = "2024.10.3" +description = "Rebuild Sphinx documentation on changes, with hot reloading in the browser." +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "sphinx_autobuild-2024.10.3-py3-none-any.whl", hash = "sha256:158e16c36f9d633e613c9aaf81c19b0fc458ca78b112533b20dafcda430d60fa"}, + {file = "sphinx_autobuild-2024.10.3.tar.gz", hash = "sha256:248150f8f333e825107b6d4b86113ab28fa51750e5f9ae63b59dc339be951fb1"}, +] + +[package.dependencies] +colorama = ">=0.4.6" +sphinx = "*" +starlette = ">=0.35" +uvicorn = ">=0.25" +watchfiles = ">=0.20" +websockets = ">=11" + +[package.extras] +test = ["httpx", "pytest (>=6)"] + +[[package]] +name = "sphinx-basic-ng" +version = "1.0.0b2" +description = "A modern skeleton for Sphinx themes." +optional = false +python-versions = ">=3.7" +groups = ["docs"] +files = [ + {file = "sphinx_basic_ng-1.0.0b2-py3-none-any.whl", hash = "sha256:eb09aedbabfb650607e9b4b68c9d240b90b1e1be221d6ad71d61c52e29f7932b"}, + {file = "sphinx_basic_ng-1.0.0b2.tar.gz", hash = "sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9"}, +] + +[package.dependencies] +sphinx = ">=4.0" + +[package.extras] +docs = ["furo", "ipython", "myst-parser", "sphinx-copybutton", "sphinx-inline-tabs"] + +[[package]] +name = "sphinx-copybutton" +version = "0.5.2" +description = "Add a copy button to each of your code cells." +optional = false +python-versions = ">=3.7" +groups = ["docs"] +files = [ + {file = "sphinx-copybutton-0.5.2.tar.gz", hash = "sha256:4cf17c82fb9646d1bc9ca92ac280813a3b605d8c421225fd9913154103ee1fbd"}, + {file = "sphinx_copybutton-0.5.2-py3-none-any.whl", hash = "sha256:fb543fd386d917746c9a2c50360c7905b605726b9355cd26e9974857afeae06e"}, +] + +[package.dependencies] +sphinx = ">=1.8" + +[package.extras] +code-style = ["pre-commit (==2.12.1)"] +rtd = ["ipython", "myst-nb", "sphinx", "sphinx-book-theme", "sphinx-examples"] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "2.0.0" +description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5"}, + {file = "sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1"}, +] + +[package.extras] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "2.0.0" +description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents" +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2"}, + {file = "sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad"}, +] + +[package.extras] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.1.0" +description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8"}, + {file = "sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9"}, +] + +[package.extras] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] +standalone = ["Sphinx (>=5)"] +test = ["html5lib", "pytest"] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +description = "A sphinx extension which renders display math in HTML via JavaScript" +optional = false +python-versions = ">=3.5" +groups = ["docs"] +files = [ + {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, + {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, +] + +[package.extras] +test = ["flake8", "mypy", "pytest"] + +[[package]] +name = "sphinxcontrib-mermaid" +version = "1.0.0" +description = "Mermaid diagrams in yours Sphinx powered docs" +optional = false +python-versions = ">=3.8" +groups = ["docs"] +files = [ + {file = "sphinxcontrib_mermaid-1.0.0-py3-none-any.whl", hash = "sha256:60b72710ea02087f212028feb09711225fbc2e343a10d34822fe787510e1caa3"}, + {file = "sphinxcontrib_mermaid-1.0.0.tar.gz", hash = "sha256:2e8ab67d3e1e2816663f9347d026a8dee4a858acdd4ad32dd1c808893db88146"}, +] + +[package.dependencies] +pyyaml = "*" +sphinx = "*" + +[package.extras] +test = ["defusedxml", "myst-parser", "pytest", "ruff", "sphinx"] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "2.0.0" +description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents" +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb"}, + {file = "sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab"}, +] + +[package.extras] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] +standalone = ["Sphinx (>=5)"] +test = ["defusedxml (>=0.7.1)", "pytest"] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "2.0.0" +description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)" +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331"}, + {file = "sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d"}, +] + +[package.extras] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "starlette" +version = "0.45.2" +description = "The little ASGI library that shines." +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "starlette-0.45.2-py3-none-any.whl", hash = "sha256:4daec3356fb0cb1e723a5235e5beaf375d2259af27532958e2d79df549dad9da"}, + {file = "starlette-0.45.2.tar.gz", hash = "sha256:bba1831d15ae5212b22feab2f218bab6ed3cd0fc2dc1d4442443bb1ee52260e0"}, +] + +[package.dependencies] +anyio = ">=3.6.2,<5" + +[package.extras] +full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.18)", "pyyaml"] + [[package]] name = "telethon" version = "1.38.1" @@ -1989,7 +2476,7 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" -groups = ["dev"] +groups = ["dev", "docs"] markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, @@ -2112,7 +2599,7 @@ version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, @@ -2183,7 +2670,7 @@ version = "2.3.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, @@ -2198,6 +2685,26 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "uvicorn" +version = "0.34.0" +description = "The lightning-fast ASGI server." +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "uvicorn-0.34.0-py3-none-any.whl", hash = "sha256:023dc038422502fa28a09c7a30bf2b6991512da7dcdb8fd35fe57cfc154126f4"}, + {file = "uvicorn-0.34.0.tar.gz", hash = "sha256:404051050cd7e905de2c9a7e61790943440b3416f49cb409f965d9dcd0fa73e9"}, +] + +[package.dependencies] +click = ">=7.0" +h11 = ">=0.8" +typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + +[package.extras] +standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] + [[package]] name = "vk-api" version = "11.9.9" @@ -2264,6 +2771,90 @@ six = "*" all = ["brotlipy"] testing = ["hookdns", "httpbin (>=0.10.2)", "pytest", "pytest-cov", "requests", "urllib3 (>=1.26.5,<1.26.16)", "wsgiprox"] +[[package]] +name = "watchfiles" +version = "1.0.4" +description = "Simple, modern and high performance file watching and code reload in python." +optional = false +python-versions = ">=3.9" +groups = ["docs"] +files = [ + {file = "watchfiles-1.0.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ba5bb3073d9db37c64520681dd2650f8bd40902d991e7b4cfaeece3e32561d08"}, + {file = "watchfiles-1.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9f25d0ba0fe2b6d2c921cf587b2bf4c451860086534f40c384329fb96e2044d1"}, + {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47eb32ef8c729dbc4f4273baece89398a4d4b5d21a1493efea77a17059f4df8a"}, + {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:076f293100db3b0b634514aa0d294b941daa85fc777f9c698adb1009e5aca0b1"}, + {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1eacd91daeb5158c598fe22d7ce66d60878b6294a86477a4715154990394c9b3"}, + {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:13c2ce7b72026cfbca120d652f02c7750f33b4c9395d79c9790b27f014c8a5a2"}, + {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:90192cdc15ab7254caa7765a98132a5a41471cf739513cc9bcf7d2ffcc0ec7b2"}, + {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:278aaa395f405972e9f523bd786ed59dfb61e4b827856be46a42130605fd0899"}, + {file = "watchfiles-1.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a462490e75e466edbb9fc4cd679b62187153b3ba804868452ef0577ec958f5ff"}, + {file = "watchfiles-1.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8d0d0630930f5cd5af929040e0778cf676a46775753e442a3f60511f2409f48f"}, + {file = "watchfiles-1.0.4-cp310-cp310-win32.whl", hash = "sha256:cc27a65069bcabac4552f34fd2dce923ce3fcde0721a16e4fb1b466d63ec831f"}, + {file = "watchfiles-1.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:8b1f135238e75d075359cf506b27bf3f4ca12029c47d3e769d8593a2024ce161"}, + {file = "watchfiles-1.0.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2a9f93f8439639dc244c4d2902abe35b0279102bca7bbcf119af964f51d53c19"}, + {file = "watchfiles-1.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9eea33ad8c418847dd296e61eb683cae1c63329b6d854aefcd412e12d94ee235"}, + {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31f1a379c9dcbb3f09cf6be1b7e83b67c0e9faabed0471556d9438a4a4e14202"}, + {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ab594e75644421ae0a2484554832ca5895f8cab5ab62de30a1a57db460ce06c6"}, + {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc2eb5d14a8e0d5df7b36288979176fbb39672d45184fc4b1c004d7c3ce29317"}, + {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f68d8e9d5a321163ddacebe97091000955a1b74cd43724e346056030b0bacee"}, + {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9ce064e81fe79faa925ff03b9f4c1a98b0bbb4a1b8c1b015afa93030cb21a49"}, + {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b77d5622ac5cc91d21ae9c2b284b5d5c51085a0bdb7b518dba263d0af006132c"}, + {file = "watchfiles-1.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1941b4e39de9b38b868a69b911df5e89dc43767feeda667b40ae032522b9b5f1"}, + {file = "watchfiles-1.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4f8c4998506241dedf59613082d1c18b836e26ef2a4caecad0ec41e2a15e4226"}, + {file = "watchfiles-1.0.4-cp311-cp311-win32.whl", hash = "sha256:4ebbeca9360c830766b9f0df3640b791be569d988f4be6c06d6fae41f187f105"}, + {file = "watchfiles-1.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:05d341c71f3d7098920f8551d4df47f7b57ac5b8dad56558064c3431bdfc0b74"}, + {file = "watchfiles-1.0.4-cp311-cp311-win_arm64.whl", hash = "sha256:32b026a6ab64245b584acf4931fe21842374da82372d5c039cba6bf99ef722f3"}, + {file = "watchfiles-1.0.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:229e6ec880eca20e0ba2f7e2249c85bae1999d330161f45c78d160832e026ee2"}, + {file = "watchfiles-1.0.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5717021b199e8353782dce03bd8a8f64438832b84e2885c4a645f9723bf656d9"}, + {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0799ae68dfa95136dde7c472525700bd48777875a4abb2ee454e3ab18e9fc712"}, + {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:43b168bba889886b62edb0397cab5b6490ffb656ee2fcb22dec8bfeb371a9e12"}, + {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb2c46e275fbb9f0c92e7654b231543c7bbfa1df07cdc4b99fa73bedfde5c844"}, + {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:857f5fc3aa027ff5e57047da93f96e908a35fe602d24f5e5d8ce64bf1f2fc733"}, + {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55ccfd27c497b228581e2838d4386301227fc0cb47f5a12923ec2fe4f97b95af"}, + {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c11ea22304d17d4385067588123658e9f23159225a27b983f343fcffc3e796a"}, + {file = "watchfiles-1.0.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:74cb3ca19a740be4caa18f238298b9d472c850f7b2ed89f396c00a4c97e2d9ff"}, + {file = "watchfiles-1.0.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c7cce76c138a91e720d1df54014a047e680b652336e1b73b8e3ff3158e05061e"}, + {file = "watchfiles-1.0.4-cp312-cp312-win32.whl", hash = "sha256:b045c800d55bc7e2cadd47f45a97c7b29f70f08a7c2fa13241905010a5493f94"}, + {file = "watchfiles-1.0.4-cp312-cp312-win_amd64.whl", hash = "sha256:c2acfa49dd0ad0bf2a9c0bb9a985af02e89345a7189be1efc6baa085e0f72d7c"}, + {file = "watchfiles-1.0.4-cp312-cp312-win_arm64.whl", hash = "sha256:22bb55a7c9e564e763ea06c7acea24fc5d2ee5dfc5dafc5cfbedfe58505e9f90"}, + {file = "watchfiles-1.0.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:8012bd820c380c3d3db8435e8cf7592260257b378b649154a7948a663b5f84e9"}, + {file = "watchfiles-1.0.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa216f87594f951c17511efe5912808dfcc4befa464ab17c98d387830ce07b60"}, + {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c9953cf85529c05b24705639ffa390f78c26449e15ec34d5339e8108c7c407"}, + {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7cf684aa9bba4cd95ecb62c822a56de54e3ae0598c1a7f2065d51e24637a3c5d"}, + {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f44a39aee3cbb9b825285ff979ab887a25c5d336e5ec3574f1506a4671556a8d"}, + {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38320582736922be8c865d46520c043bff350956dfc9fbaee3b2df4e1740a4b"}, + {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39f4914548b818540ef21fd22447a63e7be6e24b43a70f7642d21f1e73371590"}, + {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f12969a3765909cf5dc1e50b2436eb2c0e676a3c75773ab8cc3aa6175c16e902"}, + {file = "watchfiles-1.0.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:0986902677a1a5e6212d0c49b319aad9cc48da4bd967f86a11bde96ad9676ca1"}, + {file = "watchfiles-1.0.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:308ac265c56f936636e3b0e3f59e059a40003c655228c131e1ad439957592303"}, + {file = "watchfiles-1.0.4-cp313-cp313-win32.whl", hash = "sha256:aee397456a29b492c20fda2d8961e1ffb266223625346ace14e4b6d861ba9c80"}, + {file = "watchfiles-1.0.4-cp313-cp313-win_amd64.whl", hash = "sha256:d6097538b0ae5c1b88c3b55afa245a66793a8fec7ada6755322e465fb1a0e8cc"}, + {file = "watchfiles-1.0.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:d3452c1ec703aa1c61e15dfe9d482543e4145e7c45a6b8566978fbb044265a21"}, + {file = "watchfiles-1.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7b75fee5a16826cf5c46fe1c63116e4a156924d668c38b013e6276f2582230f0"}, + {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e997802d78cdb02623b5941830ab06f8860038faf344f0d288d325cc9c5d2ff"}, + {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0611d244ce94d83f5b9aff441ad196c6e21b55f77f3c47608dcf651efe54c4a"}, + {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9745a4210b59e218ce64c91deb599ae8775c8a9da4e95fb2ee6fe745fc87d01a"}, + {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4810ea2ae622add560f4aa50c92fef975e475f7ac4900ce5ff5547b2434642d8"}, + {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:740d103cd01458f22462dedeb5a3382b7f2c57d07ff033fbc9465919e5e1d0f3"}, + {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdbd912a61543a36aef85e34f212e5d2486e7c53ebfdb70d1e0b060cc50dd0bf"}, + {file = "watchfiles-1.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0bc80d91ddaf95f70258cf78c471246846c1986bcc5fd33ccc4a1a67fcb40f9a"}, + {file = "watchfiles-1.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ab0311bb2ffcd9f74b6c9de2dda1612c13c84b996d032cd74799adb656af4e8b"}, + {file = "watchfiles-1.0.4-cp39-cp39-win32.whl", hash = "sha256:02a526ee5b5a09e8168314c905fc545c9bc46509896ed282aeb5a8ba9bd6ca27"}, + {file = "watchfiles-1.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:a5ae5706058b27c74bac987d615105da17724172d5aaacc6c362a40599b6de43"}, + {file = "watchfiles-1.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cdcc92daeae268de1acf5b7befcd6cfffd9a047098199056c72e4623f531de18"}, + {file = "watchfiles-1.0.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8d3d9203705b5797f0af7e7e5baa17c8588030aaadb7f6a86107b7247303817"}, + {file = "watchfiles-1.0.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdef5a1be32d0b07dcea3318a0be95d42c98ece24177820226b56276e06b63b0"}, + {file = "watchfiles-1.0.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:342622287b5604ddf0ed2d085f3a589099c9ae8b7331df3ae9845571586c4f3d"}, + {file = "watchfiles-1.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9fe37a2de80aa785d340f2980276b17ef697ab8db6019b07ee4fd28a8359d2f3"}, + {file = "watchfiles-1.0.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:9d1ef56b56ed7e8f312c934436dea93bfa3e7368adfcf3df4c0da6d4de959a1e"}, + {file = "watchfiles-1.0.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95b42cac65beae3a362629950c444077d1b44f1790ea2772beaea95451c086bb"}, + {file = "watchfiles-1.0.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e0227b8ed9074c6172cf55d85b5670199c99ab11fd27d2c473aa30aec67ee42"}, + {file = "watchfiles-1.0.4.tar.gz", hash = "sha256:6ba473efd11062d73e4f00c2b730255f9c1bdd73cd5f9fe5b5da8dbd4a717205"}, +] + +[package.dependencies] +anyio = ">=3.0.0" + [[package]] name = "websocket-client" version = "1.8.0" @@ -2287,7 +2878,7 @@ version = "14.1" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "docs"] files = [ {file = "websockets-14.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a0adf84bc2e7c86e8a202537b4fd50e6f7f0e4a6b6bf64d7ccb96c4cd3330b29"}, {file = "websockets-14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90b5d9dfbb6d07a84ed3e696012610b6da074d97453bd01e0e30744b472c8179"}, @@ -2426,4 +3017,4 @@ test = ["pytest (>=8.1,<9.0)"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "f54b6ca7fd58aae2733ac819f3c9f9f52ea1cc06cb56330a695066bd6171c0f9" +content-hash = "26a6b3bd13262d1a23c8e9f8d99a961ff503b21b0ce1ec0fd76591dcca45868c" diff --git a/pyproject.toml b/pyproject.toml index 1b17c9a..c3a8519 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,15 @@ dependencies = [ pytest = "^8.3.4" autopep8 = "^2.3.1" +[tool.poetry.group.docs.dependencies] +sphinx = "^8.1.3" +sphinx-autoapi = "^3.4.0" +sphinxcontrib-mermaid = "^1.0.0" +sphinx-autobuild = "^2024.10.3" +sphinx-copybutton = "^0.5.2" +myst-parser = "^4.0.0" +furo = "^2024.8.6" + [project.scripts] auto-archiver = "auto_archiver.__main__:main" diff --git a/src/auto_archiver/__main__.py b/src/auto_archiver/__main__.py index b33bc14..1254ec4 100644 --- a/src/auto_archiver/__main__.py +++ b/src/auto_archiver/__main__.py @@ -1,3 +1,4 @@ +""" Entry point for the auto_archiver package. """ from . import Config from . import ArchivingOrchestrator diff --git a/src/auto_archiver/archivers/__init__.py b/src/auto_archiver/archivers/__init__.py index 996ca3b..1a15700 100644 --- a/src/auto_archiver/archivers/__init__.py +++ b/src/auto_archiver/archivers/__init__.py @@ -1,3 +1,10 @@ +""" +Archivers are responsible for retrieving the content from various external platforms. +They act as specialized modules, each tailored to interact with a specific platform, +service, or data source. The archivers collectively enable the tool to comprehensively +collect and preserve a variety of content types, such as posts, images, videos and metadata. + +""" from .archiver import Archiver from .telethon_archiver import TelethonArchiver from .twitter_archiver import TwitterArchiver @@ -9,4 +16,4 @@ from .telegram_archiver import TelegramArchiver from .vk_archiver import VkArchiver from .youtubedl_archiver import YoutubeDLArchiver from .instagram_api_archiver import InstagramAPIArchiver -from .bluesky_archiver import BlueskyArchiver \ No newline at end of file +from .bluesky_archiver import BlueskyArchiver diff --git a/src/auto_archiver/archivers/archiver.py b/src/auto_archiver/archivers/archiver.py index 25e08c3..9f82a80 100644 --- a/src/auto_archiver/archivers/archiver.py +++ b/src/auto_archiver/archivers/archiver.py @@ -1,3 +1,10 @@ +""" The `archiver` module defines the base functionality for implementing archivers in the media archiving framework. + This class provides common utility methods and a standard interface for archivers. + + Factory method to initialize an archiver instance based on its name. + + +""" from __future__ import annotations from abc import abstractmethod from dataclasses import dataclass @@ -11,6 +18,11 @@ from ..core import Metadata, Step, ArchivingContext @dataclass class Archiver(Step): + """ + Base class for implementing archivers in the media archiving framework. + Subclasses must implement the `download` method to define platform-specific behavior. + """ + name = "archiver" def __init__(self, config: dict) -> None: @@ -66,4 +78,5 @@ class Archiver(Step): return to_filename @abstractmethod - def download(self, item: Metadata) -> Metadata: pass + def download(self, item: Metadata) -> Metadata: + pass diff --git a/src/auto_archiver/archivers/instagram_api_archiver.py b/src/auto_archiver/archivers/instagram_api_archiver.py index d8acfd2..d0e7e87 100644 --- a/src/auto_archiver/archivers/instagram_api_archiver.py +++ b/src/auto_archiver/archivers/instagram_api_archiver.py @@ -1,4 +1,15 @@ -import re, requests +""" +The `instagram_api_archiver` module provides tools for archiving various types of Instagram content +using the [Instagrapi API](https://github.com/subzeroid/instagrapi). + +Connects to an Instagrapi API deployment and allows for downloading Instagram user profiles, +posts, stories, highlights, and tagged content. It offers advanced configuration options for filtering +data, reducing JSON output size, and handling large profiles. + +""" + +import re +import requests from datetime import datetime from loguru import logger from retrying import retry diff --git a/src/auto_archiver/archivers/instagram_archiver.py b/src/auto_archiver/archivers/instagram_archiver.py index 97dd172..94a8fc0 100644 --- a/src/auto_archiver/archivers/instagram_archiver.py +++ b/src/auto_archiver/archivers/instagram_archiver.py @@ -1,3 +1,8 @@ +""" Uses the Instaloader library to download content from Instagram. This class handles both individual posts + and user profiles, downloading as much information as possible, including images, videos, text, stories, + highlights, and tagged posts. Authentication is required via username/password or a session file. + +""" import re, os, shutil, traceback import instaloader # https://instaloader.github.io/as-module.html from loguru import logger diff --git a/src/auto_archiver/archivers/instagram_tbot_archiver.py b/src/auto_archiver/archivers/instagram_tbot_archiver.py index 0acc08b..01b1614 100644 --- a/src/auto_archiver/archivers/instagram_tbot_archiver.py +++ b/src/auto_archiver/archivers/instagram_tbot_archiver.py @@ -1,3 +1,11 @@ +""" +InstagramTbotArchiver Module + +This module provides functionality to archive Instagram content (posts, stories, etc.) using a Telegram bot (`instagram_load_bot`). +It interacts with the Telegram API via the Telethon library to send Instagram URLs to the bot, which retrieves the +relevant media and metadata. The fetched content is saved as `Media` objects in a temporary directory and returned as a +`Metadata` object. +""" import shutil from telethon.sync import TelegramClient diff --git a/src/auto_archiver/archivers/tiktok_archiver.py b/src/auto_archiver/archivers/tiktok_archiver.py index fac67d1..3232032 100644 --- a/src/auto_archiver/archivers/tiktok_archiver.py +++ b/src/auto_archiver/archivers/tiktok_archiver.py @@ -1,4 +1,5 @@ import json, os, traceback +import tiktok_downloader from loguru import logger diff --git a/src/auto_archiver/archivers/twitter_archiver.py b/src/auto_archiver/archivers/twitter_archiver.py index 995910b..2303302 100644 --- a/src/auto_archiver/archivers/twitter_archiver.py +++ b/src/auto_archiver/archivers/twitter_archiver.py @@ -1,4 +1,4 @@ -import re, requests, mimetypes, json, math +import re, requests, mimetypes, json from typing import Union from datetime import datetime from loguru import logger diff --git a/src/auto_archiver/archivers/youtubedl_archiver.py b/src/auto_archiver/archivers/youtubedl_archiver.py index b13cceb..a8fcfd9 100644 --- a/src/auto_archiver/archivers/youtubedl_archiver.py +++ b/src/auto_archiver/archivers/youtubedl_archiver.py @@ -1,4 +1,23 @@ -import datetime, os, yt_dlp, pysubs2 +""" +This defines an archiver implementation using `yt-dlp`. + +This module is responsible for downloading and processing media content from platforms +supported by `yt-dlp`, such as YouTube, Facebook, and others. It provides functionality +for retrieving videos, subtitles, comments, and other metadata, and it integrates with +the broader archiving framework. + +### Features +- Supports downloading videos and playlists. +- Retrieves metadata like titles, descriptions, upload dates, and durations. +- Downloads subtitles and comments when enabled. +- Configurable options for handling live streams, proxies, and more. + +""" +import datetime +import os +import pysubs2 +import yt_dlp + from loguru import logger from . import Archiver @@ -37,6 +56,7 @@ class YoutubeDLArchiver(Archiver): def download(self, item: Metadata) -> Metadata: url = item.get_url() + # Handle Facebook cookies if enabled if item.netloc in ['facebook.com', 'www.facebook.com'] and self.facebook_cookie: logger.debug('Using Facebook cookie') yt_dlp.utils.std_headers['cookie'] = self.facebook_cookie @@ -66,11 +86,12 @@ class YoutubeDLArchiver(Archiver): logger.debug(f'ytdlp exception which is normal for example a facebook page with images only will cause a IndexError: list index out of range. Exception is: \n {e}') return False - # this time download + # This time download the content ydl = yt_dlp.YoutubeDL({**ydl_options, "getcomments": self.comments}) #TODO: for playlist or long lists of videos, how to download one at a time so they can be stored before the next one is downloaded? info = ydl.extract_info(url, download=True) + # Process entries (e.g., for playlists) if "entries" in info: entries = info.get("entries", []) if not len(entries): @@ -78,9 +99,11 @@ class YoutubeDLArchiver(Archiver): return False else: entries = [info] + # Prepare enriched metadata result = Metadata() result.set_title(info.get("title")) if "description" in info: result.set_content(info["description"]) + # Process individual entries for entry in entries: try: filename = ydl.prepare_filename(entry) @@ -112,6 +135,7 @@ class YoutubeDLArchiver(Archiver): "timestamp": datetime.datetime.fromtimestamp(c.get("timestamp"), tz = datetime.timezone.utc) } for c in info.get("comments", [])]) + # Set additional metadata if (timestamp := info.get("timestamp")): #TODO: fix deprecated timestamp, timestamp = datetime.datetime.fromtimestamp(timestamp, tz = datetime.timezone.utc).isoformat() @@ -120,6 +144,7 @@ class YoutubeDLArchiver(Archiver): upload_date = datetime.datetime.strptime(upload_date, '%Y%m%d').replace(tzinfo=datetime.timezone.utc) result.set("upload_date", upload_date) + # Update status for success if self.end_means_success: result.success("yt-dlp") else: result.status = "yt-dlp" return result diff --git a/src/auto_archiver/core/__init__.py b/src/auto_archiver/core/__init__.py index 99765c7..b78df83 100644 --- a/src/auto_archiver/core/__init__.py +++ b/src/auto_archiver/core/__init__.py @@ -1,3 +1,6 @@ +""" Core modules to handle things such as orchestration, metadata and configs.. + +""" from .metadata import Metadata from .media import Media from .step import Step diff --git a/src/auto_archiver/core/config.py b/src/auto_archiver/core/config.py index 9bce88f..ec4a512 100644 --- a/src/auto_archiver/core/config.py +++ b/src/auto_archiver/core/config.py @@ -1,4 +1,9 @@ +""" +The Config class initializes and parses configurations for all other steps. +It supports CLI argument parsing, loading from YAML file, and overrides to allow +flexible setup in various environments. +""" import argparse, yaml from dataclasses import dataclass, field @@ -55,6 +60,7 @@ class Config: parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml') + # Iterate over all step subclasses to gather default configs and CLI arguments for configurable in self.configurable_parents: child: Step for child in configurable.__subclasses__(): diff --git a/src/auto_archiver/core/context.py b/src/auto_archiver/core/context.py index 8fdf040..9a21b5c 100644 --- a/src/auto_archiver/core/context.py +++ b/src/auto_archiver/core/context.py @@ -1,6 +1,21 @@ +""" ArchivingContext provides a global context for managing configurations and temporary data during the archiving process. + +This singleton class allows for: +- Storing and retrieving key-value pairs that are accessible throughout the application lifecycle. +- Marking certain values to persist across resets using `keep_on_reset`. +- Managing temporary directories and other shared data used during the archiving process. + +### Key Features: +- Creates a single global instance. +- Reset functionality allows for clearing configurations, with options for partial or full resets. +- Custom getters and setters for commonly used context values like temporary directories. + +""" + class ArchivingContext: """ - Singleton context class. + Singleton context class for managing global configurations and temporary data. + ArchivingContext._get_instance() to retrieve it if needed otherwise just ArchivingContext.set(key, value) diff --git a/src/auto_archiver/core/media.py b/src/auto_archiver/core/media.py index bf9580e..d204a6e 100644 --- a/src/auto_archiver/core/media.py +++ b/src/auto_archiver/core/media.py @@ -1,3 +1,7 @@ +""" +Manages media files and their associated metadata, supporting storage, +nested media retrieval, and type validation. +""" from __future__ import annotations import os @@ -18,6 +22,16 @@ from loguru import logger @dataclass_json # annotation order matters @dataclass class Media: + """ + Represents a media file with associated properties and storage details. + + Attributes: + - filename: The file path of the media. + - key: An optional identifier for the media. + - urls: A list of URLs where the media is stored or accessible. + - properties: Additional metadata or transformations for the media. + - _mimetype: The media's mimetype (e.g., image/jpeg, video/mp4). + """ filename: str key: str = None urls: List[str] = field(default_factory=list) @@ -40,8 +54,9 @@ class Media: s.store(any_media, url, metadata=metadata) def all_inner_media(self, include_self=False): - """ Media can be inside media properties, examples include transformations on original media. - This function returns a generator for all the inner media. + """Retrieves all media, including nested media within properties or transformations on original media. + This function returns a generator for all the inner media. + """ if include_self: yield self for prop in self.properties.values(): diff --git a/src/auto_archiver/core/metadata.py b/src/auto_archiver/core/metadata.py index 0a2ad07..04683dd 100644 --- a/src/auto_archiver/core/metadata.py +++ b/src/auto_archiver/core/metadata.py @@ -1,3 +1,13 @@ +""" +Acts as a container for metadata and media objects associated with an archived item. + +Key Functionalities: +- Store and retrieve metadata and associated media. +- Merge metadata objects with conflict resolution. +- Validate properties like URLs and timestamps. +- Manage and deduplicate media objects. +- Support for flexible metadata querying and appending. +""" from __future__ import annotations import hashlib @@ -25,7 +35,11 @@ class Metadata: def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata: """ - merges two Metadata instances, will overwrite according to overwrite_left flag + Merges another `Metadata` instance into this one. + + Conflicts are resolved based on the `overwrite_left` flag: + - If `True`, this instance's values are overwritten by `right`. + - If `False`, the inverse applies. """ if not right: return self if overwrite_left: @@ -191,4 +205,4 @@ class Metadata: for r in results[1:]: if len(r.media) > len(most_complete.media): most_complete = r elif len(r.media) == len(most_complete.media) and len(r.metadata) > len(most_complete.metadata): most_complete = r - return most_complete \ No newline at end of file + return most_complete diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index 0594dde..3290070 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -1,3 +1,9 @@ +""" Orchestrates all archiving steps, including feeding items, + archiving them with specific archivers, enrichment, storage, + formatting, database operations and clean up. + +""" + from __future__ import annotations from typing import Generator, Union, List from urllib.parse import urlparse diff --git a/src/auto_archiver/core/step.py b/src/auto_archiver/core/step.py index 3917a89..9f294fe 100644 --- a/src/auto_archiver/core/step.py +++ b/src/auto_archiver/core/step.py @@ -1,3 +1,9 @@ +""" +Defines the Step abstract base class, which acts as a blueprint for steps in the archiving pipeline +by handling user configuration, validating the steps properties, and implementing dynamic instantiation. + +""" + from __future__ import annotations from dataclasses import dataclass from inspect import ClassFoundException @@ -10,6 +16,7 @@ class Step(ABC): name: str = None def __init__(self, config: dict) -> None: + # Initialises each step by reading the relevant entries # reads the configs into object properties # self.config = config[self.name] for k, v in config.get(self.name, {}).items(): @@ -20,7 +27,9 @@ class Step(ABC): def init(name: str, config: dict, child: Type[Step]) -> Step: """ - looks into direct subclasses of child for name and returns such an object + Attempts to instantiate a subclass of the provided `child` type + matching the given `name`. + Raises ClassFoundException if no matching subclass is found. TODO: cannot find subclasses of child.subclasses """ for sub in child.__subclasses__(): @@ -30,7 +39,9 @@ class Step(ABC): def assert_valid_string(self, prop: str) -> None: """ - receives a property name an ensures it exists and is a valid non-empty string, raises an exception if not + Receives a property name and ensures it exists and is a valid non-empty string, + raising an AssertionError if not. + TODO: replace assertions with custom exceptions """ assert hasattr(self, prop), f"property {prop} not found" s = getattr(self, prop) diff --git a/src/auto_archiver/databases/__init__.py b/src/auto_archiver/databases/__init__.py index df48f39..4c73896 100644 --- a/src/auto_archiver/databases/__init__.py +++ b/src/auto_archiver/databases/__init__.py @@ -1,3 +1,7 @@ +""" Databases are used to store the outputs from running the Autp Archiver. + + +""" from .database import Database from .gsheet_db import GsheetsDb from .console_db import ConsoleDb diff --git a/src/auto_archiver/databases/api_db.py b/src/auto_archiver/databases/api_db.py index 233e2a9..4304855 100644 --- a/src/auto_archiver/databases/api_db.py +++ b/src/auto_archiver/databases/api_db.py @@ -32,7 +32,9 @@ class AAApiDb(Database): "tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))}, } def fetch(self, item: Metadata) -> Union[Metadata, bool]: - """ query the database for the existence of this item""" + """ query the database for the existence of this item. + Helps avoid re-archiving the same URL multiple times. + """ if not self.allow_rearchive: return params = {"url": item.get_url(), "limit": 15} diff --git a/src/auto_archiver/enrichers/__init__.py b/src/auto_archiver/enrichers/__init__.py index 5681403..64ce248 100644 --- a/src/auto_archiver/enrichers/__init__.py +++ b/src/auto_archiver/enrichers/__init__.py @@ -1,3 +1,15 @@ +""" +Enrichers are modular components that enhance archived content by adding +context, metadata, or additional processing. + +These add additional information to the context, such as screenshots, hashes, and metadata. +They are designed to work within the archiving pipeline, operating on `Metadata` objects after +the archiving step and before storage or formatting. + +Enrichers are optional but highly useful for making the archived data more powerful. + + +""" from .enricher import Enricher from .screenshot_enricher import ScreenshotEnricher from .wayback_enricher import WaybackArchiverEnricher diff --git a/src/auto_archiver/enrichers/enricher.py b/src/auto_archiver/enrichers/enricher.py index 4948d57..f195f23 100644 --- a/src/auto_archiver/enrichers/enricher.py +++ b/src/auto_archiver/enrichers/enricher.py @@ -1,3 +1,5 @@ +""" Base classes and utilities for enrichers in the Auto-Archiver system. +""" from __future__ import annotations from dataclasses import dataclass from abc import abstractmethod, ABC diff --git a/src/auto_archiver/enrichers/hash_enricher.py b/src/auto_archiver/enrichers/hash_enricher.py index 1270149..69973b7 100644 --- a/src/auto_archiver/enrichers/hash_enricher.py +++ b/src/auto_archiver/enrichers/hash_enricher.py @@ -1,3 +1,12 @@ +""" Hash Enricher for generating cryptographic hashes of media files. + +The `HashEnricher` calculates cryptographic hashes (e.g., SHA-256, SHA3-512) +for media files stored in `Metadata` objects. These hashes are used for +validating content integrity, ensuring data authenticity, and identifying +exact duplicates. The hash is computed by reading the file's bytes in chunks, +making it suitable for handling large files efficiently. + +""" import hashlib from loguru import logger diff --git a/src/auto_archiver/enrichers/pdq_hash_enricher.py b/src/auto_archiver/enrichers/pdq_hash_enricher.py index ff88bab..36f793d 100644 --- a/src/auto_archiver/enrichers/pdq_hash_enricher.py +++ b/src/auto_archiver/enrichers/pdq_hash_enricher.py @@ -1,3 +1,15 @@ +""" +PDQ Hash Enricher for generating perceptual hashes of media files. + +The `PdqHashEnricher` processes media files (e.g., images) in `Metadata` +objects and calculates perceptual hashes using the PDQ hashing algorithm. +These hashes are designed specifically for images and can be used +for detecting duplicate or near-duplicate visual content. + +This enricher is typically used after thumbnail or screenshot enrichers +to ensure images are available for hashing. + +""" import traceback import pdqhash import numpy as np diff --git a/src/auto_archiver/enrichers/thumbnail_enricher.py b/src/auto_archiver/enrichers/thumbnail_enricher.py index 0ffbe38..5d8bee2 100644 --- a/src/auto_archiver/enrichers/thumbnail_enricher.py +++ b/src/auto_archiver/enrichers/thumbnail_enricher.py @@ -1,3 +1,11 @@ +"""Thumbnail Enricher for generating visual previews of video files. + +The `ThumbnailEnricher` processes video files in `Metadata` objects and +creates evenly distributed thumbnail images. These thumbnails provide +visual snapshots of the video's keyframes, helping users preview content +and identify important moments without watching the entire video. + +""" import ffmpeg, os from loguru import logger diff --git a/src/auto_archiver/feeders/__init__.py b/src/auto_archiver/feeders/__init__.py index 84a8495..8117672 100644 --- a/src/auto_archiver/feeders/__init__.py +++ b/src/auto_archiver/feeders/__init__.py @@ -1,3 +1,6 @@ +""" Feeders handle the input of media into the Auto Archiver. + +""" from.feeder import Feeder from .gsheet_feeder import GsheetsFeeder from .cli_feeder import CLIFeeder diff --git a/src/auto_archiver/feeders/gsheet_feeder.py b/src/auto_archiver/feeders/gsheet_feeder.py index 19142f4..1c4fc32 100644 --- a/src/auto_archiver/feeders/gsheet_feeder.py +++ b/src/auto_archiver/feeders/gsheet_feeder.py @@ -1,3 +1,13 @@ +""" +GsheetsFeeder: A Google Sheets-based feeder for the Auto Archiver. + +This reads data from Google Sheets and filters rows based on user-defined rules. +The filtered rows are processed into `Metadata` objects. + +### Key properties +- validates the sheet's structure and filters rows based on input configurations. +- Ensures only rows with valid URLs and unprocessed statuses are included. +""" import gspread, os from loguru import logger diff --git a/src/auto_archiver/formatters/__init__.py b/src/auto_archiver/formatters/__init__.py index ce8afac..af96f15 100644 --- a/src/auto_archiver/formatters/__init__.py +++ b/src/auto_archiver/formatters/__init__.py @@ -1,3 +1,4 @@ +""" Formatters for the output of the content. """ from .formatter import Formatter from .html_formatter import HtmlFormatter from .mute_formatter import MuteFormatter \ No newline at end of file diff --git a/src/auto_archiver/storages/__init__.py b/src/auto_archiver/storages/__init__.py index 5f768a6..bff83e6 100644 --- a/src/auto_archiver/storages/__init__.py +++ b/src/auto_archiver/storages/__init__.py @@ -1,3 +1,6 @@ +""" This module contains the storage classes for the auto-archiver. + +""" from .storage import Storage from .s3 import S3Storage from .local import LocalStorage diff --git a/src/auto_archiver/utils/__init__.py b/src/auto_archiver/utils/__init__.py index fe5cb58..1e8669c 100644 --- a/src/auto_archiver/utils/__init__.py +++ b/src/auto_archiver/utils/__init__.py @@ -1,3 +1,4 @@ +""" Auto Archiver Utilities. """ # we need to explicitly expose the available imports here from .gworksheet import GWorksheet from .misc import * diff --git a/src/auto_archiver/utils/webdriver.py b/src/auto_archiver/utils/webdriver.py index 7e95330..cf84c35 100644 --- a/src/auto_archiver/utils/webdriver.py +++ b/src/auto_archiver/utils/webdriver.py @@ -1,3 +1,4 @@ +""" This Webdriver class acts as a context manager for the selenium webdriver. """ from __future__ import annotations from selenium import webdriver from selenium.common.exceptions import TimeoutException From 235da33a1a176a01d2102e6d9fef6ae0bb7b8064 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Thu, 16 Jan 2025 09:24:46 +0000 Subject: [PATCH 3/8] Update .readthedocs.yaml path --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index c8c07ac..9f67835 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -19,4 +19,4 @@ build: sphinx: - configuration: docs/conf.py + configuration: docs/source/conf.py From bbb3269c2be5203ce1856c1f7172ad6ade2557d9 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Thu, 16 Jan 2025 09:30:32 +0000 Subject: [PATCH 4/8] Changes from main. --- src/auto_archiver/archivers/tiktok_archiver.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/auto_archiver/archivers/tiktok_archiver.py b/src/auto_archiver/archivers/tiktok_archiver.py index 3232032..fac67d1 100644 --- a/src/auto_archiver/archivers/tiktok_archiver.py +++ b/src/auto_archiver/archivers/tiktok_archiver.py @@ -1,5 +1,4 @@ import json, os, traceback -import tiktok_downloader from loguru import logger From a6aacfa3fbfb9722479383b992909fd03e091f37 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Thu, 16 Jan 2025 09:31:50 +0000 Subject: [PATCH 5/8] Add example pre-generated configs.rst --- docs/source/configs.rst | 741 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 741 insertions(+) create mode 100644 docs/source/configs.rst diff --git a/docs/source/configs.rst b/docs/source/configs.rst new file mode 100644 index 0000000..9f793e1 --- /dev/null +++ b/docs/source/configs.rst @@ -0,0 +1,741 @@ +Configs +======= + +This section documents all configuration options available for various components. + +InstagramAPIArchiver +-------------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - access_token + - None + - a valid instagrapi-api token + * - api_endpoint + - None + - API endpoint to use + * - full_profile + - False + - if true, will download all posts, tagged posts, stories, and highlights for a profile, if false, will only download the profile pic and information. + * - full_profile_max_posts + - 0 + - Use to limit the number of posts to download when full_profile is true. 0 means no limit. limit is applied softly since posts are fetched in batch, once to: posts, tagged posts, and highlights + * - minimize_json_output + - True + - if true, will remove empty values from the json output + +InstagramArchiver +----------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - username + - None + - a valid Instagram username + * - password + - None + - the corresponding Instagram account password + * - download_folder + - instaloader + - name of a folder to temporarily download content to + * - session_file + - secrets/instaloader.session + - path to the instagram session which saves session credentials + +InstagramTbotArchiver +--------------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - api_id + - None + - telegram API_ID value, go to https://my.telegram.org/apps + * - api_hash + - None + - telegram API_HASH value, go to https://my.telegram.org/apps + * - session_file + - secrets/anon-insta + - optional, records the telegram login session for future usage, '.session' will be appended to the provided value. + * - timeout + - 45 + - timeout to fetch the instagram content in seconds. + +TelethonArchiver +---------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - api_id + - None + - telegram API_ID value, go to https://my.telegram.org/apps + * - api_hash + - None + - telegram API_HASH value, go to https://my.telegram.org/apps + * - bot_token + - None + - optional, but allows access to more content such as large videos, talk to @botfather + * - session_file + - secrets/anon + - optional, records the telegram login session for future usage, '.session' will be appended to the provided value. + * - join_channels + - True + - disables the initial setup with channel_invites config, useful if you have a lot and get stuck + * - channel_invites + - {} + - (JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup + +TwitterApiArchiver +------------------ + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - bearer_token + - None + - [deprecated: see bearer_tokens] twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret + * - bearer_tokens + - [] + - a list of twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret, if provided you can still add those for better rate limits. CSV of bearer tokens if provided via the command line + * - consumer_key + - None + - twitter API consumer_key + * - consumer_secret + - None + - twitter API consumer_secret + * - access_token + - None + - twitter API access_token + * - access_secret + - None + - twitter API access_secret + +VkArchiver +---------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - username + - None + - valid VKontakte username + * - password + - None + - valid VKontakte password + * - session_file + - secrets/vk_config.v2.json + - valid VKontakte password + +YoutubeDLArchiver +----------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - facebook_cookie + - None + - optional facebook cookie to have more access to content, from browser, looks like 'cookie: datr= xxxx' + * - subtitles + - True + - download subtitles if available + * - comments + - False + - download all comments if available, may lead to large metadata + * - livestreams + - False + - if set, will download live streams, otherwise will skip them; see --max-filesize for more control + * - live_from_start + - False + - if set, will download live streams from their earliest available moment, otherwise starts now. + * - proxy + - + - http/socks (https seems to not work atm) proxy to use for the webdriver, eg https://proxy- user:password@proxy-ip:port + * - end_means_success + - True + - if True, any archived content will mean a 'success', if False this archiver will not return a 'success' stage; this is useful for cases when the yt-dlp will archive a video but ignore other types of content like images or text only pages that the subsequent archivers can retrieve. + * - allow_playlist + - False + - If True will also download playlists, set to False if the expectation is to download a single video. + * - max_downloads + - inf + - Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit. + * - cookies_from_browser + - None + - optional browser for ytdl to extract cookies from, can be one of: brave, chrome, chromium, edge, firefox, opera, safari, vivaldi, whale + * - cookie_file + - None + - optional cookie file to use for Youtube, see instructions here on how to export from your browser: https://github.com/yt-dlp/yt- dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp + +AAApiDb +------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - api_endpoint + - None + - API endpoint where calls are made to + * - api_token + - None + - API Bearer token. + * - public + - False + - whether the URL should be publicly available via the API + * - author_id + - None + - which email to assign as author + * - group_id + - None + - which group of users have access to the archive in case public=false as author + * - allow_rearchive + - True + - if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived + * - store_results + - True + - when set, will send the results to the API database. + * - tags + - [] + - what tags to add to the archived URL + +AtlosDb +------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - api_token + - None + - An Atlos API token. For more information, see https://docs.atlos.org/technical/api/ + * - atlos_url + - https://platform.atlos.org + - The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash. + +CSVDb +----- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - csv_file + - db.csv + - CSV file name + +HashEnricher +------------ + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - algorithm + - SHA-256 + - hash algorithm to use + * - chunksize + - 16000000 + - number of bytes to use when reading files in chunks (if this value is too large you will run out of RAM), default is 16MB + +ScreenshotEnricher +------------------ + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - width + - 1280 + - width of the screenshots + * - height + - 720 + - height of the screenshots + * - timeout + - 60 + - timeout for taking the screenshot + * - sleep_before_screenshot + - 4 + - seconds to wait for the pages to load before taking screenshot + * - http_proxy + - + - http proxy to use for the webdriver, eg http://proxy-user:password@proxy-ip:port + * - save_to_pdf + - False + - save the page as pdf along with the screenshot. PDF saving options can be adjusted with the 'print_options' parameter + * - print_options + - {} + - options to pass to the pdf printer + +SSLEnricher +----------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - skip_when_nothing_archived + - True + - if true, will skip enriching when no media is archived + +ThumbnailEnricher +----------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - thumbnails_per_minute + - 60 + - how many thumbnails to generate per minute of video, can be limited by max_thumbnails + * - max_thumbnails + - 16 + - limit the number of thumbnails to generate per video, 0 means no limit + +TimestampingEnricher +-------------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - tsa_urls + - ['http://timestamp.digicert.com', 'http://timestamp.identrust.com', 'http://timestamp.globalsign.com/tsa/r6advanced1', 'http://tss.accv.es:8318/tsa'] + - List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line. + +WaczArchiverEnricher +-------------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - profile + - None + - browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix- crawler#creating-and-using-browser-profiles). + * - docker_commands + - None + - if a custom docker invocation is needed + * - timeout + - 120 + - timeout for WACZ generation in seconds + * - extract_media + - False + - If enabled all the images/videos/audio present in the WACZ archive will be extracted into separate Media and appear in the html report. The .wacz file will be kept untouched. + * - extract_screenshot + - True + - If enabled the screenshot captured by browsertrix will be extracted into separate Media and appear in the html report. The .wacz file will be kept untouched. + * - socks_proxy_host + - None + - SOCKS proxy host for browsertrix-crawler, use in combination with socks_proxy_port. eg: user:password@host + * - socks_proxy_port + - None + - SOCKS proxy port for browsertrix-crawler, use in combination with socks_proxy_host. eg 1234 + * - proxy_server + - None + - SOCKS server proxy URL, in development + +WaybackArchiverEnricher +----------------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - timeout + - 15 + - seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually. + * - if_not_archived_within + - None + - only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information: https://docs.google.com/document/d/1N sv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA + * - key + - None + - wayback API key. to get credentials visit https://archive.org/account/s3.php + * - secret + - None + - wayback API secret. to get credentials visit https://archive.org/account/s3.php + * - proxy_http + - None + - http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port + * - proxy_https + - None + - https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port + +WhisperEnricher +--------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - api_endpoint + - None + - WhisperApi api endpoint, eg: https://whisperbox- api.com/api/v1, a deployment of https://github.com/bellingcat/whisperbox- transcribe. + * - api_key + - None + - WhisperApi api key for authentication + * - include_srt + - False + - Whether to include a subtitle SRT (SubRip Subtitle file) for the video (can be used in video players). + * - timeout + - 90 + - How many seconds to wait at most for a successful job completion. + * - action + - translate + - which Whisper operation to execute + +AtlosFeeder +----------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - api_token + - None + - An Atlos API token. For more information, see https://docs.atlos.org/technical/api/ + * - atlos_url + - https://platform.atlos.org + - The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash. + +CLIFeeder +--------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - urls + - None + - URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml + +GsheetsFeeder +------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - sheet + - None + - name of the sheet to archive + * - sheet_id + - None + - (alternative to sheet name) the id of the sheet to archive + * - header + - 1 + - index of the header row (starts at 1) + * - service_account + - secrets/service_account.json + - service account JSON file path + * - columns + - {'url': 'link', 'status': 'archive status', 'folder': 'destination folder', 'archive': 'archive location', 'date': 'archive date', 'thumbnail': 'thumbnail', 'timestamp': 'upload timestamp', 'title': 'upload title', 'text': 'text content', 'screenshot': 'screenshot', 'hash': 'hash', 'pdq_hash': 'perceptual hashes', 'wacz': 'wacz', 'replaywebpage': 'replaywebpage'} + - names of columns in the google sheet (stringified JSON object) + * - allow_worksheets + - set() + - (CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed + * - block_worksheets + - set() + - (CSV) explicitly block some worksheets from being processed + * - use_sheet_names_in_stored_paths + - True + - if True the stored files path will include 'workbook_name/worksheet_name/...' + +HtmlFormatter +------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - detect_thumbnails + - True + - if true will group by thumbnails generated by thumbnail enricher by id 'thumbnail_00' + +AtlosStorage +------------ + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - path_generator + - url + - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory. + * - filename_generator + - random + - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash. + * - api_token + - None + - An Atlos API token. For more information, see https://docs.atlos.org/technical/api/ + * - atlos_url + - https://platform.atlos.org + - The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash. + +GDriveStorage +------------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - path_generator + - url + - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory. + * - filename_generator + - random + - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash. + * - root_folder_id + - None + - root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID' + * - oauth_token + - None + - JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account. + * - service_account + - secrets/service_account.json + - service account JSON file path, same as used for Google Sheets. NOTE: storage used will count towards the developer account. + +LocalStorage +------------ + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - path_generator + - url + - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory. + * - filename_generator + - random + - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash. + * - save_to + - ./archived + - folder where to save archived content + * - save_absolute + - False + - whether the path to the stored file is absolute or relative in the output result inc. formatters (WARN: leaks the file structure) + +S3Storage +--------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - path_generator + - url + - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory. + * - filename_generator + - random + - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash. + * - bucket + - None + - S3 bucket name + * - region + - None + - S3 region name + * - key + - None + - S3 API key + * - secret + - None + - S3 API secret + * - random_no_duplicate + - False + - if set, it will override `path_generator`, `filename_generator` and `folder`. It will check if the file already exists and if so it will not upload it again. Creates a new root folder path `no-dups/` + * - endpoint_url + - https://{region}.digitaloceanspaces.com + - S3 bucket endpoint, {region} are inserted at runtime + * - cdn_url + - https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key} + - S3 CDN url, {bucket}, {region} and {key} are inserted at runtime + * - private + - False + - if true S3 files will not be readable online + +Storage +------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - path_generator + - url + - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory. + * - filename_generator + - random + - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash. + +Gsheets +------- + +The following table lists all configuration options for this component: + +.. list-table:: Configuration Options + :header-rows: 1 + :widths: 25 20 55 + + * - **Key** + - **Default** + - **Description** + * - sheet + - None + - name of the sheet to archive + * - sheet_id + - None + - (alternative to sheet name) the id of the sheet to archive + * - header + - 1 + - index of the header row (starts at 1) + * - service_account + - secrets/service_account.json + - service account JSON file path + * - columns + - {'url': 'link', 'status': 'archive status', 'folder': 'destination folder', 'archive': 'archive location', 'date': 'archive date', 'thumbnail': 'thumbnail', 'timestamp': 'upload timestamp', 'title': 'upload title', 'text': 'text content', 'screenshot': 'screenshot', 'hash': 'hash', 'pdq_hash': 'perceptual hashes', 'wacz': 'wacz', 'replaywebpage': 'replaywebpage'} + - names of columns in the google sheet (stringified JSON object) + From 6fabe2a1896e3e56e6ccd7e5972f992517691c6d Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Thu, 16 Jan 2025 09:56:54 +0000 Subject: [PATCH 6/8] Fixed twitter_archiver.py changes. --- src/auto_archiver/archivers/twitter_archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_archiver/archivers/twitter_archiver.py b/src/auto_archiver/archivers/twitter_archiver.py index 2303302..995910b 100644 --- a/src/auto_archiver/archivers/twitter_archiver.py +++ b/src/auto_archiver/archivers/twitter_archiver.py @@ -1,4 +1,4 @@ -import re, requests, mimetypes, json +import re, requests, mimetypes, json, math from typing import Union from datetime import datetime from loguru import logger From 170f8d18a64bb9b84141087d9b684d9362bff6cd Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Thu, 16 Jan 2025 20:46:10 +0000 Subject: [PATCH 7/8] Add instructions to README.md, include build directories in .gitignore and do a bit more tidying, --- .gitignore | 2 ++ README.md | 42 ++++++++++++++++++++++++++++++++++ docs/source/configurations.rst | 2 -- docs/source/index.rst | 2 +- 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index f545ac2..7c6bf08 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ logs* *.csv archived/ dist* +docs/_build/ +docs/source/autoapi/ diff --git a/README.md b/README.md index 90da1af..1bd6ddd 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,8 @@ Clone and run: 2. `poetry install` 3. `poetry run python -m src.auto_archiver --config secrets/orchestration.yaml` +Note: Add the plugin [poetry-shell-plugin](https://github.com/python-poetry/poetry-plugin-shell) and run `poetry shell` to activate the virtual environment. +This allows you to run the auto-archiver without the `poetry run` prefix.
@@ -286,6 +288,46 @@ manual release to docker hub * `docker image tag auto-archiver bellingcat/auto-archiver:latest` * `docker push bellingcat/auto-archiver` + +### Building the Docs + +The documentation is built using [Sphinx](https://www.sphinx-doc.org/en/master/) and [AutoAPI](https://sphinx-autoapi.readthedocs.io/en/latest/) and hosted on ReadTheDocs. +To build the documentation locally, run the following commands: + +**Install required dependencies:** +- Install the docs group of dependencies: +```shell +# only the docs dependencies +poetry install --only docs + +# or for all dependencies +poetry install +``` +- Either use [poetry-plugin-shell](https://github.com/python-poetry/poetry-plugin-shell) to activate the virtual environment: `poetry shell` +- Or prepend the following commands with `poetry run` + +**Create the documentation:** +- Build the documentation: +``` +# Using makefile (Linux/macOS): +make -C docs html + +# or using sphinx directly (Windows/Linux/macOS): +sphinx-build -b html docs/source docs/_build/html +``` +- If you make significant changes and want a fresh build run: `make -C docs clean` to remove the old build files. + +**Viewing the documentation:** +```shell +# to open the documentation in your browser. +open docs/_build/html/index.html + +# or run autobuild to automatically update the documentation when you make changes +sphinx-autobuild docs/source docs/_build/html +``` + + + #### RELEASE * update version in [version.py](src/auto_archiver/version.py) * go to github releases > new release > use `vx.y.z` for matching version notation diff --git a/docs/source/configurations.rst b/docs/source/configurations.rst index 3fb482c..a466869 100644 --- a/docs/source/configurations.rst +++ b/docs/source/configurations.rst @@ -30,6 +30,4 @@ Configs This section of the documentation will show the custom configurations for the individual steps of the tool. -.. include:: configs.rst - diff --git a/docs/source/index.rst b/docs/source/index.rst index 22b6e1e..d6437e1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -20,8 +20,8 @@ Auto Archiver documentation :hidden: :caption: Contents: + user_guidelines developer_guidelines configurations - user_guidelines configs From e83ccc0d7f328fe7b5b49b24da3e6861a3b04664 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 21 Jan 2025 09:48:46 +0000 Subject: [PATCH 8/8] Cleaning up configs reference and module level. --- docs/source/{ => _auto}/configs.rst | 3 ++- docs/source/configurations.rst | 1 + docs/source/index.rst | 3 +-- src/__init__.py | 0 4 files changed, 4 insertions(+), 3 deletions(-) rename docs/source/{ => _auto}/configs.rst (99%) delete mode 100644 src/__init__.py diff --git a/docs/source/configs.rst b/docs/source/_auto/configs.rst similarity index 99% rename from docs/source/configs.rst rename to docs/source/_auto/configs.rst index 9f793e1..f6e81f0 100644 --- a/docs/source/configs.rst +++ b/docs/source/_auto/configs.rst @@ -1,5 +1,6 @@ + Configs -======= +------- This section documents all configuration options available for various components. diff --git a/docs/source/configurations.rst b/docs/source/configurations.rst index a466869..85d7922 100644 --- a/docs/source/configurations.rst +++ b/docs/source/configurations.rst @@ -29,5 +29,6 @@ Configs This section of the documentation will show the custom configurations for the individual steps of the tool. +.. include:: _auto/configs.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index d6437e1..52449b8 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -16,12 +16,11 @@ Auto Archiver documentation .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :hidden: :caption: Contents: user_guidelines developer_guidelines configurations - configs diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000