diff --git a/.github/workflows/docker-publish.yaml b/.github/workflows/docker-publish.yaml
index 379aaaa..4d232e2 100644
--- a/.github/workflows/docker-publish.yaml
+++ b/.github/workflows/docker-publish.yaml
@@ -25,22 +25,22 @@ jobs:
uses: actions/checkout@v3
- name: Set up QEMU
- uses: docker/setup-qemu-action@v1
+ uses: docker/setup-qemu-action@v3
# https://github.com/docker/setup-buildx-action
-
+
- name: Set up Docker Buildx
id: buildx
- uses: docker/setup-buildx-action@v1
-
+ uses: docker/setup-buildx-action@v3
+
- name: Log in to Docker Hub
- uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
+ uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
-
+
- name: Extract metadata (tags, labels) for Docker
id: meta
- uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
+ uses: docker/metadata-action@369eb591f429131d6889c46b94e711f089e6ca96
with:
images: bellingcat/auto-archiver
diff --git a/.github/workflows/python-publish.yaml b/.github/workflows/python-publish.yaml
index b63a560..5badd29 100644
--- a/.github/workflows/python-publish.yaml
+++ b/.github/workflows/python-publish.yaml
@@ -22,27 +22,20 @@ jobs:
steps:
- name: Checkout Repository
- uses: actions/checkout@v3
-
- - name: Extract Python Version from pyproject.toml
- id: python-version
- run: |
- version=$(grep 'python =' pyproject.toml | awk -F'"' '{print $2}' | tr -d '^~<=>')
- echo "python-version=$version" >> $GITHUB_ENV
+ uses: actions/checkout@v4
- name: Set up Python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
- python-version: ${{ env.python-version }}
+ python-version-file: pyproject.toml
- name: Install Poetry
run: |
- python -m pip install --upgrade pip
- python -m pip install "poetry>=2.0.0,<3.0.0"
+ pipx install "poetry>=2.0.0,<3.0.0"
- name: Install dependencies
run: |
- poetry install --no-root
+ poetry install --no-interaction --no-root
- name: Build the package
run: |
diff --git a/.gitignore b/.gitignore
index f545ac2..7c6bf08 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,5 @@ logs*
*.csv
archived/
dist*
+docs/_build/
+docs/source/autoapi/
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..9f67835
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,22 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.10"
+ jobs:
+ post_install:
+ - pip install poetry
+ # https://python-poetry.org/docs/managing-dependencies/#dependency-groups
+ # VIRTUAL_ENV needs to be set manually for now.
+ # See https://github.com/readthedocs/readthedocs.org/pull/11152/
+ - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --only docs
+
+
+sphinx:
+ configuration: docs/source/conf.py
diff --git a/Dockerfile b/Dockerfile
index 3b1e252..0ecc7f3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,7 +13,7 @@ RUN add-apt-repository ppa:mozillateam/ppa && \
apt-get install -y --no-install-recommends gcc ffmpeg fonts-noto exiftool && \
apt-get install -y --no-install-recommends firefox-esr && \
ln -s /usr/bin/firefox-esr /usr/bin/firefox && \
- wget https://github.com/mozilla/geckodriver/releases/download/v0.33.0/geckodriver-v0.33.0-linux64.tar.gz && \
+ wget https://github.com/mozilla/geckodriver/releases/download/v0.35.0/geckodriver-v0.35.0-linux64.tar.gz && \
tar -xvzf geckodriver* -C /usr/local/bin && \
chmod +x /usr/local/bin/geckodriver && \
rm geckodriver-v* && \
diff --git a/README.md b/README.md
index 90da1af..1bd6ddd 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,8 @@ Clone and run:
2. `poetry install`
3. `poetry run python -m src.auto_archiver --config secrets/orchestration.yaml`
+Note: Add the plugin [poetry-shell-plugin](https://github.com/python-poetry/poetry-plugin-shell) and run `poetry shell` to activate the virtual environment.
+This allows you to run the auto-archiver without the `poetry run` prefix.
@@ -286,6 +288,46 @@ manual release to docker hub
* `docker image tag auto-archiver bellingcat/auto-archiver:latest`
* `docker push bellingcat/auto-archiver`
+
+### Building the Docs
+
+The documentation is built using [Sphinx](https://www.sphinx-doc.org/en/master/) and [AutoAPI](https://sphinx-autoapi.readthedocs.io/en/latest/) and hosted on ReadTheDocs.
+To build the documentation locally, run the following commands:
+
+**Install required dependencies:**
+- Install the docs group of dependencies:
+```shell
+# only the docs dependencies
+poetry install --only docs
+
+# or for all dependencies
+poetry install
+```
+- Either use [poetry-plugin-shell](https://github.com/python-poetry/poetry-plugin-shell) to activate the virtual environment: `poetry shell`
+- Or prepend the following commands with `poetry run`
+
+**Create the documentation:**
+- Build the documentation:
+```
+# Using makefile (Linux/macOS):
+make -C docs html
+
+# or using sphinx directly (Windows/Linux/macOS):
+sphinx-build -b html docs/source docs/_build/html
+```
+- If you make significant changes and want a fresh build run: `make -C docs clean` to remove the old build files.
+
+**Viewing the documentation:**
+```shell
+# to open the documentation in your browser.
+open docs/_build/html/index.html
+
+# or run autobuild to automatically update the documentation when you make changes
+sphinx-autobuild docs/source docs/_build/html
+```
+
+
+
#### RELEASE
* update version in [version.py](src/auto_archiver/version.py)
* go to github releases > new release > use `vx.y.z` for matching version notation
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..92dd33a
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = source
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/source/_auto/configs.rst b/docs/source/_auto/configs.rst
new file mode 100644
index 0000000..f6e81f0
--- /dev/null
+++ b/docs/source/_auto/configs.rst
@@ -0,0 +1,742 @@
+
+Configs
+-------
+
+This section documents all configuration options available for various components.
+
+InstagramAPIArchiver
+--------------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - access_token
+ - None
+ - a valid instagrapi-api token
+ * - api_endpoint
+ - None
+ - API endpoint to use
+ * - full_profile
+ - False
+ - if true, will download all posts, tagged posts, stories, and highlights for a profile, if false, will only download the profile pic and information.
+ * - full_profile_max_posts
+ - 0
+ - Use to limit the number of posts to download when full_profile is true. 0 means no limit. limit is applied softly since posts are fetched in batch, once to: posts, tagged posts, and highlights
+ * - minimize_json_output
+ - True
+ - if true, will remove empty values from the json output
+
+InstagramArchiver
+-----------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - username
+ - None
+ - a valid Instagram username
+ * - password
+ - None
+ - the corresponding Instagram account password
+ * - download_folder
+ - instaloader
+ - name of a folder to temporarily download content to
+ * - session_file
+ - secrets/instaloader.session
+ - path to the instagram session which saves session credentials
+
+InstagramTbotArchiver
+---------------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - api_id
+ - None
+ - telegram API_ID value, go to https://my.telegram.org/apps
+ * - api_hash
+ - None
+ - telegram API_HASH value, go to https://my.telegram.org/apps
+ * - session_file
+ - secrets/anon-insta
+ - optional, records the telegram login session for future usage, '.session' will be appended to the provided value.
+ * - timeout
+ - 45
+ - timeout to fetch the instagram content in seconds.
+
+TelethonArchiver
+----------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - api_id
+ - None
+ - telegram API_ID value, go to https://my.telegram.org/apps
+ * - api_hash
+ - None
+ - telegram API_HASH value, go to https://my.telegram.org/apps
+ * - bot_token
+ - None
+ - optional, but allows access to more content such as large videos, talk to @botfather
+ * - session_file
+ - secrets/anon
+ - optional, records the telegram login session for future usage, '.session' will be appended to the provided value.
+ * - join_channels
+ - True
+ - disables the initial setup with channel_invites config, useful if you have a lot and get stuck
+ * - channel_invites
+ - {}
+ - (JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup
+
+TwitterApiArchiver
+------------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - bearer_token
+ - None
+ - [deprecated: see bearer_tokens] twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret
+ * - bearer_tokens
+ - []
+ - a list of twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret, if provided you can still add those for better rate limits. CSV of bearer tokens if provided via the command line
+ * - consumer_key
+ - None
+ - twitter API consumer_key
+ * - consumer_secret
+ - None
+ - twitter API consumer_secret
+ * - access_token
+ - None
+ - twitter API access_token
+ * - access_secret
+ - None
+ - twitter API access_secret
+
+VkArchiver
+----------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - username
+ - None
+ - valid VKontakte username
+ * - password
+ - None
+ - valid VKontakte password
+ * - session_file
+ - secrets/vk_config.v2.json
+ - valid VKontakte password
+
+YoutubeDLArchiver
+-----------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - facebook_cookie
+ - None
+ - optional facebook cookie to have more access to content, from browser, looks like 'cookie: datr= xxxx'
+ * - subtitles
+ - True
+ - download subtitles if available
+ * - comments
+ - False
+ - download all comments if available, may lead to large metadata
+ * - livestreams
+ - False
+ - if set, will download live streams, otherwise will skip them; see --max-filesize for more control
+ * - live_from_start
+ - False
+ - if set, will download live streams from their earliest available moment, otherwise starts now.
+ * - proxy
+ -
+ - http/socks (https seems to not work atm) proxy to use for the webdriver, eg https://proxy- user:password@proxy-ip:port
+ * - end_means_success
+ - True
+ - if True, any archived content will mean a 'success', if False this archiver will not return a 'success' stage; this is useful for cases when the yt-dlp will archive a video but ignore other types of content like images or text only pages that the subsequent archivers can retrieve.
+ * - allow_playlist
+ - False
+ - If True will also download playlists, set to False if the expectation is to download a single video.
+ * - max_downloads
+ - inf
+ - Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.
+ * - cookies_from_browser
+ - None
+ - optional browser for ytdl to extract cookies from, can be one of: brave, chrome, chromium, edge, firefox, opera, safari, vivaldi, whale
+ * - cookie_file
+ - None
+ - optional cookie file to use for Youtube, see instructions here on how to export from your browser: https://github.com/yt-dlp/yt- dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp
+
+AAApiDb
+-------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - api_endpoint
+ - None
+ - API endpoint where calls are made to
+ * - api_token
+ - None
+ - API Bearer token.
+ * - public
+ - False
+ - whether the URL should be publicly available via the API
+ * - author_id
+ - None
+ - which email to assign as author
+ * - group_id
+ - None
+ - which group of users have access to the archive in case public=false as author
+ * - allow_rearchive
+ - True
+ - if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived
+ * - store_results
+ - True
+ - when set, will send the results to the API database.
+ * - tags
+ - []
+ - what tags to add to the archived URL
+
+AtlosDb
+-------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - api_token
+ - None
+ - An Atlos API token. For more information, see https://docs.atlos.org/technical/api/
+ * - atlos_url
+ - https://platform.atlos.org
+ - The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.
+
+CSVDb
+-----
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - csv_file
+ - db.csv
+ - CSV file name
+
+HashEnricher
+------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - algorithm
+ - SHA-256
+ - hash algorithm to use
+ * - chunksize
+ - 16000000
+ - number of bytes to use when reading files in chunks (if this value is too large you will run out of RAM), default is 16MB
+
+ScreenshotEnricher
+------------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - width
+ - 1280
+ - width of the screenshots
+ * - height
+ - 720
+ - height of the screenshots
+ * - timeout
+ - 60
+ - timeout for taking the screenshot
+ * - sleep_before_screenshot
+ - 4
+ - seconds to wait for the pages to load before taking screenshot
+ * - http_proxy
+ -
+ - http proxy to use for the webdriver, eg http://proxy-user:password@proxy-ip:port
+ * - save_to_pdf
+ - False
+ - save the page as pdf along with the screenshot. PDF saving options can be adjusted with the 'print_options' parameter
+ * - print_options
+ - {}
+ - options to pass to the pdf printer
+
+SSLEnricher
+-----------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - skip_when_nothing_archived
+ - True
+ - if true, will skip enriching when no media is archived
+
+ThumbnailEnricher
+-----------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - thumbnails_per_minute
+ - 60
+ - how many thumbnails to generate per minute of video, can be limited by max_thumbnails
+ * - max_thumbnails
+ - 16
+ - limit the number of thumbnails to generate per video, 0 means no limit
+
+TimestampingEnricher
+--------------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - tsa_urls
+ - ['http://timestamp.digicert.com', 'http://timestamp.identrust.com', 'http://timestamp.globalsign.com/tsa/r6advanced1', 'http://tss.accv.es:8318/tsa']
+ - List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line.
+
+WaczArchiverEnricher
+--------------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - profile
+ - None
+ - browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix- crawler#creating-and-using-browser-profiles).
+ * - docker_commands
+ - None
+ - if a custom docker invocation is needed
+ * - timeout
+ - 120
+ - timeout for WACZ generation in seconds
+ * - extract_media
+ - False
+ - If enabled all the images/videos/audio present in the WACZ archive will be extracted into separate Media and appear in the html report. The .wacz file will be kept untouched.
+ * - extract_screenshot
+ - True
+ - If enabled the screenshot captured by browsertrix will be extracted into separate Media and appear in the html report. The .wacz file will be kept untouched.
+ * - socks_proxy_host
+ - None
+ - SOCKS proxy host for browsertrix-crawler, use in combination with socks_proxy_port. eg: user:password@host
+ * - socks_proxy_port
+ - None
+ - SOCKS proxy port for browsertrix-crawler, use in combination with socks_proxy_host. eg 1234
+ * - proxy_server
+ - None
+ - SOCKS server proxy URL, in development
+
+WaybackArchiverEnricher
+-----------------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - timeout
+ - 15
+ - seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually.
+ * - if_not_archived_within
+ - None
+ - only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information: https://docs.google.com/document/d/1N sv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA
+ * - key
+ - None
+ - wayback API key. to get credentials visit https://archive.org/account/s3.php
+ * - secret
+ - None
+ - wayback API secret. to get credentials visit https://archive.org/account/s3.php
+ * - proxy_http
+ - None
+ - http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port
+ * - proxy_https
+ - None
+ - https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port
+
+WhisperEnricher
+---------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - api_endpoint
+ - None
+ - WhisperApi api endpoint, eg: https://whisperbox- api.com/api/v1, a deployment of https://github.com/bellingcat/whisperbox- transcribe.
+ * - api_key
+ - None
+ - WhisperApi api key for authentication
+ * - include_srt
+ - False
+ - Whether to include a subtitle SRT (SubRip Subtitle file) for the video (can be used in video players).
+ * - timeout
+ - 90
+ - How many seconds to wait at most for a successful job completion.
+ * - action
+ - translate
+ - which Whisper operation to execute
+
+AtlosFeeder
+-----------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - api_token
+ - None
+ - An Atlos API token. For more information, see https://docs.atlos.org/technical/api/
+ * - atlos_url
+ - https://platform.atlos.org
+ - The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.
+
+CLIFeeder
+---------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - urls
+ - None
+ - URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml
+
+GsheetsFeeder
+-------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - sheet
+ - None
+ - name of the sheet to archive
+ * - sheet_id
+ - None
+ - (alternative to sheet name) the id of the sheet to archive
+ * - header
+ - 1
+ - index of the header row (starts at 1)
+ * - service_account
+ - secrets/service_account.json
+ - service account JSON file path
+ * - columns
+ - {'url': 'link', 'status': 'archive status', 'folder': 'destination folder', 'archive': 'archive location', 'date': 'archive date', 'thumbnail': 'thumbnail', 'timestamp': 'upload timestamp', 'title': 'upload title', 'text': 'text content', 'screenshot': 'screenshot', 'hash': 'hash', 'pdq_hash': 'perceptual hashes', 'wacz': 'wacz', 'replaywebpage': 'replaywebpage'}
+ - names of columns in the google sheet (stringified JSON object)
+ * - allow_worksheets
+ - set()
+ - (CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed
+ * - block_worksheets
+ - set()
+ - (CSV) explicitly block some worksheets from being processed
+ * - use_sheet_names_in_stored_paths
+ - True
+ - if True the stored files path will include 'workbook_name/worksheet_name/...'
+
+HtmlFormatter
+-------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - detect_thumbnails
+ - True
+ - if true will group by thumbnails generated by thumbnail enricher by id 'thumbnail_00'
+
+AtlosStorage
+------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - path_generator
+ - url
+ - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.
+ * - filename_generator
+ - random
+ - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.
+ * - api_token
+ - None
+ - An Atlos API token. For more information, see https://docs.atlos.org/technical/api/
+ * - atlos_url
+ - https://platform.atlos.org
+ - The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.
+
+GDriveStorage
+-------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - path_generator
+ - url
+ - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.
+ * - filename_generator
+ - random
+ - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.
+ * - root_folder_id
+ - None
+ - root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID'
+ * - oauth_token
+ - None
+ - JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account.
+ * - service_account
+ - secrets/service_account.json
+ - service account JSON file path, same as used for Google Sheets. NOTE: storage used will count towards the developer account.
+
+LocalStorage
+------------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - path_generator
+ - url
+ - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.
+ * - filename_generator
+ - random
+ - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.
+ * - save_to
+ - ./archived
+ - folder where to save archived content
+ * - save_absolute
+ - False
+ - whether the path to the stored file is absolute or relative in the output result inc. formatters (WARN: leaks the file structure)
+
+S3Storage
+---------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - path_generator
+ - url
+ - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.
+ * - filename_generator
+ - random
+ - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.
+ * - bucket
+ - None
+ - S3 bucket name
+ * - region
+ - None
+ - S3 region name
+ * - key
+ - None
+ - S3 API key
+ * - secret
+ - None
+ - S3 API secret
+ * - random_no_duplicate
+ - False
+ - if set, it will override `path_generator`, `filename_generator` and `folder`. It will check if the file already exists and if so it will not upload it again. Creates a new root folder path `no-dups/`
+ * - endpoint_url
+ - https://{region}.digitaloceanspaces.com
+ - S3 bucket endpoint, {region} are inserted at runtime
+ * - cdn_url
+ - https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}
+ - S3 CDN url, {bucket}, {region} and {key} are inserted at runtime
+ * - private
+ - False
+ - if true S3 files will not be readable online
+
+Storage
+-------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - path_generator
+ - url
+ - how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.
+ * - filename_generator
+ - random
+ - how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.
+
+Gsheets
+-------
+
+The following table lists all configuration options for this component:
+
+.. list-table:: Configuration Options
+ :header-rows: 1
+ :widths: 25 20 55
+
+ * - **Key**
+ - **Default**
+ - **Description**
+ * - sheet
+ - None
+ - name of the sheet to archive
+ * - sheet_id
+ - None
+ - (alternative to sheet name) the id of the sheet to archive
+ * - header
+ - 1
+ - index of the header row (starts at 1)
+ * - service_account
+ - secrets/service_account.json
+ - service account JSON file path
+ * - columns
+ - {'url': 'link', 'status': 'archive status', 'folder': 'destination folder', 'archive': 'archive location', 'date': 'archive date', 'thumbnail': 'thumbnail', 'timestamp': 'upload timestamp', 'title': 'upload title', 'text': 'text content', 'screenshot': 'screenshot', 'hash': 'hash', 'pdq_hash': 'perceptual hashes', 'wacz': 'wacz', 'replaywebpage': 'replaywebpage'}
+ - names of columns in the google sheet (stringified JSON object)
+
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..3168b22
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,64 @@
+# Configuration file for the Sphinx documentation builder.
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+from importlib.metadata import metadata
+
+package_metadata = metadata("auto-archiver")
+project = package_metadata["name"]
+authors = package_metadata["authors"]
+release = package_metadata["version"]
+
+
+# -- General configuration ---------------------------------------------------
+extensions = [
+ "autoapi.extension", # Generate API documentation from docstrings
+ "myst_parser", # Markdown support
+ 'sphinxcontrib.mermaid', # Mermaid diagrams
+ "sphinx.ext.viewcode", # Source code links
+ "sphinx.ext.napoleon", # Google-style and NumPy-style docstrings
+ # "sphinx.ext.autodoc", # Include custom docstrings
+ # 'sphinx.ext.autosummary', # Summarize module/class/function docs
+]
+
+templates_path = ['_templates']
+exclude_patterns = []
+
+
+# -- AutoAPI Configuration ---------------------------------------------------
+autoapi_type = 'python'
+autoapi_dirs = ["../../src"]
+autodoc_typehints = "signature" # Include type hints in the signature
+autoapi_ignore = [] # Ignore specific modules
+autoapi_keep_files = True # Option to retain intermediate JSON files for debugging
+autoapi_add_toctree_entry = True # Include API docs in the TOC
+autoapi_template_dir = None # Use default templates
+autoapi_options = [
+ "members",
+ "undoc-members",
+ "show-inheritance",
+ "show-module-summary",
+ "imported-members",
+]
+
+
+# -- Markdown Support --------------------------------------------------------
+myst_enable_extensions = [
+ "colon_fence", # ::: fences
+ "deflist", # Definition lists
+ "html_admonition", # HTML-style admonitions
+ "html_image", # Inline HTML images
+ "replacements", # Substitutions like (C)
+ "smartquotes", # Smart quotes
+ "linkify", # Auto-detect links
+ "substitution", # Text substitutions
+]
+source_suffix = {
+ ".rst": "restructuredtext",
+ ".md": "markdown",
+}
+
+# -- Options for HTML output -------------------------------------------------
+html_theme = 'furo'
+# html_static_path = ['_static']
+
diff --git a/docs/source/configurations.rst b/docs/source/configurations.rst
new file mode 100644
index 0000000..85d7922
--- /dev/null
+++ b/docs/source/configurations.rst
@@ -0,0 +1,34 @@
+
+Configurations
+==============
+
+This section of the documentation provides guidelines for configuring the tool.
+
+File Reference
+--------------
+
+
+Below is the content of the `example.orchestration.yaml` file:
+
+.. raw:: html
+
+
+ View example.orchestration.yaml
+
+.. literalinclude:: ../../example.orchestration.yaml
+ :language: yaml
+ :caption: example.orchestration.yaml
+
+.. raw:: html
+
+
+
+
+Configs
+-------
+
+This section of the documentation will show the custom configurations for the individual steps of the tool.
+
+.. include:: _auto/configs.rst
+
+
diff --git a/docs/source/developer_guidelines.rst b/docs/source/developer_guidelines.rst
new file mode 100644
index 0000000..c0fdee0
--- /dev/null
+++ b/docs/source/developer_guidelines.rst
@@ -0,0 +1,6 @@
+
+Developer Guidelines
+====================
+
+This section of the documentation provides guidelines for developers who want to modify or contribute to the tool.
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..52449b8
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,26 @@
+.. auto-archiver documentation master file, created by
+ sphinx-quickstart on Sun Jan 12 20:35:50 2025.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Auto Archiver documentation
+===========================
+
+.. note::
+
+ This is a work in progress.
+
+
+.. include:: ../../README.md
+ :parser: myst
+
+
+.. toctree::
+ :maxdepth: 1
+ :hidden:
+ :caption: Contents:
+
+ user_guidelines
+ developer_guidelines
+ configurations
+
diff --git a/docs/source/user_guidelines.rst b/docs/source/user_guidelines.rst
new file mode 100644
index 0000000..93fb2f2
--- /dev/null
+++ b/docs/source/user_guidelines.rst
@@ -0,0 +1,11 @@
+
+User Guidelines
+===============
+
+This section of the documentation provides guidelines for users who want to use the tool,
+without needing to modify the code.
+To see the developer guidelines, see :ref:`developer_guidelines`.
+
+.. note::
+
+ This is a work in progress.
diff --git a/poetry.lock b/poetry.lock
index adb2726..40d108a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,5 +1,40 @@
# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand.
+[[package]]
+name = "alabaster"
+version = "1.0.0"
+description = "A light, configurable Sphinx theme"
+optional = false
+python-versions = ">=3.10"
+groups = ["docs"]
+files = [
+ {file = "alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b"},
+ {file = "alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e"},
+]
+
+[[package]]
+name = "anyio"
+version = "4.8.0"
+description = "High level compatibility layer for multiple asynchronous event loop implementations"
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a"},
+ {file = "anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a"},
+]
+
+[package.dependencies]
+exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
+idna = ">=2.8"
+sniffio = ">=1.1"
+typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
+
+[package.extras]
+doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"]
+trio = ["trio (>=0.26.1)"]
+
[[package]]
name = "asn1crypto"
version = "1.5.1"
@@ -12,6 +47,21 @@ files = [
{file = "asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c"},
]
+[[package]]
+name = "astroid"
+version = "3.3.8"
+description = "An abstract syntax tree for Python with inference support."
+optional = false
+python-versions = ">=3.9.0"
+groups = ["docs"]
+files = [
+ {file = "astroid-3.3.8-py3-none-any.whl", hash = "sha256:187ccc0c248bfbba564826c26f070494f7bc964fd286b6d9fff4420e55de828c"},
+ {file = "astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b"},
+]
+
+[package.dependencies]
+typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
+
[[package]]
name = "attrs"
version = "24.3.0"
@@ -63,13 +113,28 @@ files = [
pycodestyle = ">=2.12.0"
tomli = {version = "*", markers = "python_version < \"3.11\""}
+[[package]]
+name = "babel"
+version = "2.16.0"
+description = "Internationalization utilities"
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"},
+ {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"},
+]
+
+[package.extras]
+dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
+
[[package]]
name = "beautifulsoup4"
version = "4.12.3"
description = "Screen-scraping library"
optional = false
python-versions = ">=3.6.0"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
@@ -87,18 +152,18 @@ lxml = ["lxml"]
[[package]]
name = "boto3"
-version = "1.35.98"
+version = "1.35.99"
description = "The AWS SDK for Python"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
- {file = "boto3-1.35.98-py3-none-any.whl", hash = "sha256:d0224e1499d7189b47aa7f469d96522d98df6f5702fccb20a95a436582ebcd9d"},
- {file = "boto3-1.35.98.tar.gz", hash = "sha256:4b6274b4fe9d7113f978abea66a1f20c8a397c268c9d1b2a6c96b14a256da4a5"},
+ {file = "boto3-1.35.99-py3-none-any.whl", hash = "sha256:83e560faaec38a956dfb3d62e05e1703ee50432b45b788c09e25107c5058bd71"},
+ {file = "boto3-1.35.99.tar.gz", hash = "sha256:e0abd794a7a591d90558e92e29a9f8837d25ece8e3c120e530526fe27eba5fca"},
]
[package.dependencies]
-botocore = ">=1.35.98,<1.36.0"
+botocore = ">=1.35.99,<1.36.0"
jmespath = ">=0.7.1,<2.0.0"
s3transfer = ">=0.10.0,<0.11.0"
@@ -107,14 +172,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
[[package]]
name = "botocore"
-version = "1.35.98"
+version = "1.35.99"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
- {file = "botocore-1.35.98-py3-none-any.whl", hash = "sha256:4f1c0b687488663a774ad3a5e81a5f94fae1bcada2364cfdc48482c4dbf794d5"},
- {file = "botocore-1.35.98.tar.gz", hash = "sha256:d11742b3824bdeac3c89eeeaf5132351af41823bbcef8fc15e95c8250b1de09c"},
+ {file = "botocore-1.35.99-py3-none-any.whl", hash = "sha256:b22d27b6b617fc2d7342090d6129000af2efd20174215948c0d7ae2da0fab445"},
+ {file = "botocore-1.35.99.tar.gz", hash = "sha256:1eab44e969c39c5f3d9a3104a0836c24715579a455f12b3979a31d7cde51b3c3"},
]
[package.dependencies]
@@ -294,7 +359,7 @@ version = "2024.12.14"
description = "Python package for providing Mozilla's CA Bundle."
optional = false
python-versions = ">=3.6"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"},
{file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"},
@@ -402,7 +467,7 @@ version = "3.4.1"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
optional = false
python-versions = ">=3.7"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
{file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@@ -498,13 +563,28 @@ files = [
{file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"},
]
+[[package]]
+name = "click"
+version = "8.1.8"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+groups = ["docs"]
+files = [
+ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
+ {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
[[package]]
name = "colorama"
version = "0.4.6"
description = "Cross-platform colored terminal text."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-groups = ["main", "dev"]
+groups = ["main", "dev", "docs"]
files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@@ -596,13 +676,25 @@ calendars = ["convertdate", "hijri-converter"]
fasttext = ["fasttext"]
langdetect = ["langdetect"]
+[[package]]
+name = "docutils"
+version = "0.21.2"
+description = "Docutils -- Python Documentation Utilities"
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"},
+ {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"},
+]
+
[[package]]
name = "exceptiongroup"
version = "1.2.2"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
-groups = ["main", "dev"]
+groups = ["main", "dev", "docs"]
markers = "python_version < \"3.11\""
files = [
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
@@ -630,6 +722,24 @@ future = "*"
[package.extras]
dev = ["Sphinx (==2.1.0)", "future (==0.17.1)", "numpy (==1.16.4)", "pytest (==4.6.1)", "pytest-mock (==1.10.4)", "tox (==3.12.1)"]
+[[package]]
+name = "furo"
+version = "2024.8.6"
+description = "A clean customisable Sphinx documentation theme."
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "furo-2024.8.6-py3-none-any.whl", hash = "sha256:6cd97c58b47813d3619e63e9081169880fbe331f0ca883c871ff1f3f11814f5c"},
+ {file = "furo-2024.8.6.tar.gz", hash = "sha256:b63e4cee8abfc3136d3bc03a3d45a76a850bada4d6374d24c1716b0e01394a01"},
+]
+
+[package.dependencies]
+beautifulsoup4 = "*"
+pygments = ">=2.7"
+sphinx = ">=6.0,<9.0"
+sphinx-basic-ng = ">=1.0.0.beta2"
+
[[package]]
name = "future"
version = "1.0.0"
@@ -786,7 +896,7 @@ version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false
python-versions = ">=3.7"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@@ -813,7 +923,7 @@ version = "3.10"
description = "Internationalized Domain Names in Applications (IDNA)"
optional = false
python-versions = ">=3.6"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
{file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -822,6 +932,18 @@ files = [
[package.extras]
all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+[[package]]
+name = "imagesize"
+version = "1.4.1"
+description = "Getting image size from png/jpeg/jpeg2000/gif file"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+groups = ["docs"]
+files = [
+ {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"},
+ {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"},
+]
+
[[package]]
name = "iniconfig"
version = "2.0.0"
@@ -857,7 +979,7 @@ version = "3.1.5"
description = "A very fast and expressive template engine."
optional = false
python-versions = ">=3.7"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"},
{file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"},
@@ -915,13 +1037,38 @@ win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
[package.extras]
dev = ["Sphinx (==8.1.3)", "build (==1.2.2)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.5.0)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.13.0)", "mypy (==v1.4.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pytest (==6.1.2)", "pytest (==8.3.2)", "pytest-cov (==2.12.1)", "pytest-cov (==5.0.0)", "pytest-cov (==6.0.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.1.0)", "sphinx-rtd-theme (==3.0.2)", "tox (==3.27.1)", "tox (==4.23.2)", "twine (==6.0.1)"]
+[[package]]
+name = "markdown-it-py"
+version = "3.0.0"
+description = "Python port of markdown-it. Markdown parsing, done right!"
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
+ {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
+]
+
+[package.dependencies]
+mdurl = ">=0.1,<1.0"
+
+[package.extras]
+benchmarking = ["psutil", "pytest", "pytest-benchmark"]
+code-style = ["pre-commit (>=3.0,<4.0)"]
+compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
+linkify = ["linkify-it-py (>=1,<3)"]
+plugins = ["mdit-py-plugins"]
+profiling = ["gprof2dot"]
+rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
[[package]]
name = "markupsafe"
version = "3.0.2"
description = "Safely add untrusted strings to HTML/XML markup."
optional = false
python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"},
{file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"},
@@ -1006,6 +1153,38 @@ dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"]
docs = ["autodocsumm (==0.2.14)", "furo (==2024.8.6)", "sphinx (==8.1.3)", "sphinx-copybutton (==0.5.2)", "sphinx-issues (==5.0.0)", "sphinxext-opengraph (==0.9.1)"]
tests = ["pytest", "simplejson"]
+[[package]]
+name = "mdit-py-plugins"
+version = "0.4.2"
+description = "Collection of plugins for markdown-it-py"
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636"},
+ {file = "mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5"},
+]
+
+[package.dependencies]
+markdown-it-py = ">=1.0.0,<4.0.0"
+
+[package.extras]
+code-style = ["pre-commit"]
+rtd = ["myst-parser", "sphinx-book-theme"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+description = "Markdown URL utilities"
+optional = false
+python-versions = ">=3.7"
+groups = ["docs"]
+files = [
+ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
+ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
+]
+
[[package]]
name = "mutagen"
version = "1.47.0"
@@ -1030,6 +1209,33 @@ files = [
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
]
+[[package]]
+name = "myst-parser"
+version = "4.0.0"
+description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser,"
+optional = false
+python-versions = ">=3.10"
+groups = ["docs"]
+files = [
+ {file = "myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d"},
+ {file = "myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531"},
+]
+
+[package.dependencies]
+docutils = ">=0.19,<0.22"
+jinja2 = "*"
+markdown-it-py = ">=3.0,<4.0"
+mdit-py-plugins = ">=0.4.1,<1.0"
+pyyaml = "*"
+sphinx = ">=7,<9"
+
+[package.extras]
+code-style = ["pre-commit (>=3.0,<4.0)"]
+linkify = ["linkify-it-py (>=2.0,<3.0)"]
+rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-book-theme (>=1.1,<2.0)", "sphinx-copybutton", "sphinx-design", "sphinx-pyscript", "sphinx-tippy (>=0.4.3)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.9.0,<0.10.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"]
+testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"]
+testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"]
+
[[package]]
name = "numpy"
version = "2.1.3"
@@ -1167,7 +1373,7 @@ version = "24.2"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main", "dev", "docs"]
files = [
{file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
{file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
@@ -1442,6 +1648,21 @@ files = [
{file = "pycryptodomex-3.21.0.tar.gz", hash = "sha256:222d0bd05381dd25c32dd6065c071ebf084212ab79bab4599ba9e6a3e0009e6c"},
]
+[[package]]
+name = "pygments"
+version = "2.19.1"
+description = "Pygments is a syntax highlighting package written in Python."
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
+ {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
+]
+
+[package.extras]
+windows-terminal = ["colorama (>=0.4.6)"]
+
[[package]]
name = "pyopenssl"
version = "24.2.1"
@@ -1592,7 +1813,7 @@ version = "6.0.2"
description = "YAML parser and emitter for Python"
optional = false
python-versions = ">=3.8"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
{file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@@ -1759,7 +1980,7 @@ version = "2.32.3"
description = "Python HTTP for Humans."
optional = false
python-versions = ">=3.8"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
{file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@@ -1881,12 +2102,24 @@ version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
+[[package]]
+name = "snowballstemmer"
+version = "2.2.0"
+description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms."
+optional = false
+python-versions = "*"
+groups = ["docs"]
+files = [
+ {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
+ {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
+]
+
[[package]]
name = "sortedcontainers"
version = "2.4.0"
@@ -1905,12 +2138,266 @@ version = "2.6"
description = "A modern CSS selector implementation for Beautiful Soup."
optional = false
python-versions = ">=3.8"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
{file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
]
+[[package]]
+name = "sphinx"
+version = "8.1.3"
+description = "Python documentation generator"
+optional = false
+python-versions = ">=3.10"
+groups = ["docs"]
+files = [
+ {file = "sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2"},
+ {file = "sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927"},
+]
+
+[package.dependencies]
+alabaster = ">=0.7.14"
+babel = ">=2.13"
+colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""}
+docutils = ">=0.20,<0.22"
+imagesize = ">=1.3"
+Jinja2 = ">=3.1"
+packaging = ">=23.0"
+Pygments = ">=2.17"
+requests = ">=2.30.0"
+snowballstemmer = ">=2.2"
+sphinxcontrib-applehelp = ">=1.0.7"
+sphinxcontrib-devhelp = ">=1.0.6"
+sphinxcontrib-htmlhelp = ">=2.0.6"
+sphinxcontrib-jsmath = ">=1.0.1"
+sphinxcontrib-qthelp = ">=1.0.6"
+sphinxcontrib-serializinghtml = ">=1.1.9"
+tomli = {version = ">=2", markers = "python_version < \"3.11\""}
+
+[package.extras]
+docs = ["sphinxcontrib-websupport"]
+lint = ["flake8 (>=6.0)", "mypy (==1.11.1)", "pyright (==1.1.384)", "pytest (>=6.0)", "ruff (==0.6.9)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-Pillow (==10.2.0.20240822)", "types-Pygments (==2.18.0.20240506)", "types-colorama (==0.4.15.20240311)", "types-defusedxml (==0.7.0.20240218)", "types-docutils (==0.21.0.20241005)", "types-requests (==2.32.0.20240914)", "types-urllib3 (==1.26.25.14)"]
+test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"]
+
+[[package]]
+name = "sphinx-autoapi"
+version = "3.4.0"
+description = "Sphinx API documentation generator"
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92"},
+ {file = "sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c"},
+]
+
+[package.dependencies]
+astroid = [
+ {version = ">=2.7", markers = "python_version < \"3.12\""},
+ {version = ">=3.0.0a1", markers = "python_version >= \"3.12\""},
+]
+Jinja2 = "*"
+PyYAML = "*"
+sphinx = ">=6.1.0"
+
+[[package]]
+name = "sphinx-autobuild"
+version = "2024.10.3"
+description = "Rebuild Sphinx documentation on changes, with hot reloading in the browser."
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "sphinx_autobuild-2024.10.3-py3-none-any.whl", hash = "sha256:158e16c36f9d633e613c9aaf81c19b0fc458ca78b112533b20dafcda430d60fa"},
+ {file = "sphinx_autobuild-2024.10.3.tar.gz", hash = "sha256:248150f8f333e825107b6d4b86113ab28fa51750e5f9ae63b59dc339be951fb1"},
+]
+
+[package.dependencies]
+colorama = ">=0.4.6"
+sphinx = "*"
+starlette = ">=0.35"
+uvicorn = ">=0.25"
+watchfiles = ">=0.20"
+websockets = ">=11"
+
+[package.extras]
+test = ["httpx", "pytest (>=6)"]
+
+[[package]]
+name = "sphinx-basic-ng"
+version = "1.0.0b2"
+description = "A modern skeleton for Sphinx themes."
+optional = false
+python-versions = ">=3.7"
+groups = ["docs"]
+files = [
+ {file = "sphinx_basic_ng-1.0.0b2-py3-none-any.whl", hash = "sha256:eb09aedbabfb650607e9b4b68c9d240b90b1e1be221d6ad71d61c52e29f7932b"},
+ {file = "sphinx_basic_ng-1.0.0b2.tar.gz", hash = "sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9"},
+]
+
+[package.dependencies]
+sphinx = ">=4.0"
+
+[package.extras]
+docs = ["furo", "ipython", "myst-parser", "sphinx-copybutton", "sphinx-inline-tabs"]
+
+[[package]]
+name = "sphinx-copybutton"
+version = "0.5.2"
+description = "Add a copy button to each of your code cells."
+optional = false
+python-versions = ">=3.7"
+groups = ["docs"]
+files = [
+ {file = "sphinx-copybutton-0.5.2.tar.gz", hash = "sha256:4cf17c82fb9646d1bc9ca92ac280813a3b605d8c421225fd9913154103ee1fbd"},
+ {file = "sphinx_copybutton-0.5.2-py3-none-any.whl", hash = "sha256:fb543fd386d917746c9a2c50360c7905b605726b9355cd26e9974857afeae06e"},
+]
+
+[package.dependencies]
+sphinx = ">=1.8"
+
+[package.extras]
+code-style = ["pre-commit (==2.12.1)"]
+rtd = ["ipython", "myst-nb", "sphinx", "sphinx-book-theme", "sphinx-examples"]
+
+[[package]]
+name = "sphinxcontrib-applehelp"
+version = "2.0.0"
+description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books"
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5"},
+ {file = "sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-devhelp"
+version = "2.0.0"
+description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents"
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2"},
+ {file = "sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-htmlhelp"
+version = "2.1.0"
+description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files"
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8"},
+ {file = "sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["html5lib", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-jsmath"
+version = "1.0.1"
+description = "A sphinx extension which renders display math in HTML via JavaScript"
+optional = false
+python-versions = ">=3.5"
+groups = ["docs"]
+files = [
+ {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"},
+ {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
+]
+
+[package.extras]
+test = ["flake8", "mypy", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-mermaid"
+version = "1.0.0"
+description = "Mermaid diagrams in yours Sphinx powered docs"
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "sphinxcontrib_mermaid-1.0.0-py3-none-any.whl", hash = "sha256:60b72710ea02087f212028feb09711225fbc2e343a10d34822fe787510e1caa3"},
+ {file = "sphinxcontrib_mermaid-1.0.0.tar.gz", hash = "sha256:2e8ab67d3e1e2816663f9347d026a8dee4a858acdd4ad32dd1c808893db88146"},
+]
+
+[package.dependencies]
+pyyaml = "*"
+sphinx = "*"
+
+[package.extras]
+test = ["defusedxml", "myst-parser", "pytest", "ruff", "sphinx"]
+
+[[package]]
+name = "sphinxcontrib-qthelp"
+version = "2.0.0"
+description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents"
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb"},
+ {file = "sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["defusedxml (>=0.7.1)", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-serializinghtml"
+version = "2.0.0"
+description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)"
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331"},
+ {file = "sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["pytest"]
+
+[[package]]
+name = "starlette"
+version = "0.45.2"
+description = "The little ASGI library that shines."
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "starlette-0.45.2-py3-none-any.whl", hash = "sha256:4daec3356fb0cb1e723a5235e5beaf375d2259af27532958e2d79df549dad9da"},
+ {file = "starlette-0.45.2.tar.gz", hash = "sha256:bba1831d15ae5212b22feab2f218bab6ed3cd0fc2dc1d4442443bb1ee52260e0"},
+]
+
+[package.dependencies]
+anyio = ">=3.6.2,<5"
+
+[package.extras]
+full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.18)", "pyyaml"]
+
[[package]]
name = "telethon"
version = "1.38.1"
@@ -1948,7 +2435,7 @@ version = "2.2.1"
description = "A lil' TOML parser"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["dev", "docs"]
markers = "python_version < \"3.11\""
files = [
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
@@ -2071,7 +2558,7 @@ version = "4.12.2"
description = "Backported and Experimental Type Hints for Python 3.8+"
optional = false
python-versions = ">=3.8"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
@@ -2142,7 +2629,7 @@ version = "2.3.0"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"},
{file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"},
@@ -2157,6 +2644,26 @@ h2 = ["h2 (>=4,<5)"]
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["zstandard (>=0.18.0)"]
+[[package]]
+name = "uvicorn"
+version = "0.34.0"
+description = "The lightning-fast ASGI server."
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "uvicorn-0.34.0-py3-none-any.whl", hash = "sha256:023dc038422502fa28a09c7a30bf2b6991512da7dcdb8fd35fe57cfc154126f4"},
+ {file = "uvicorn-0.34.0.tar.gz", hash = "sha256:404051050cd7e905de2c9a7e61790943440b3416f49cb409f965d9dcd0fa73e9"},
+]
+
+[package.dependencies]
+click = ">=7.0"
+h11 = ">=0.8"
+typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"]
+
[[package]]
name = "vk-api"
version = "11.9.9"
@@ -2223,6 +2730,90 @@ six = "*"
all = ["brotlipy"]
testing = ["hookdns", "httpbin (>=0.10.2)", "pytest", "pytest-cov", "requests", "urllib3 (>=1.26.5,<1.26.16)", "wsgiprox"]
+[[package]]
+name = "watchfiles"
+version = "1.0.4"
+description = "Simple, modern and high performance file watching and code reload in python."
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "watchfiles-1.0.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ba5bb3073d9db37c64520681dd2650f8bd40902d991e7b4cfaeece3e32561d08"},
+ {file = "watchfiles-1.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9f25d0ba0fe2b6d2c921cf587b2bf4c451860086534f40c384329fb96e2044d1"},
+ {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47eb32ef8c729dbc4f4273baece89398a4d4b5d21a1493efea77a17059f4df8a"},
+ {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:076f293100db3b0b634514aa0d294b941daa85fc777f9c698adb1009e5aca0b1"},
+ {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1eacd91daeb5158c598fe22d7ce66d60878b6294a86477a4715154990394c9b3"},
+ {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:13c2ce7b72026cfbca120d652f02c7750f33b4c9395d79c9790b27f014c8a5a2"},
+ {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:90192cdc15ab7254caa7765a98132a5a41471cf739513cc9bcf7d2ffcc0ec7b2"},
+ {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:278aaa395f405972e9f523bd786ed59dfb61e4b827856be46a42130605fd0899"},
+ {file = "watchfiles-1.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a462490e75e466edbb9fc4cd679b62187153b3ba804868452ef0577ec958f5ff"},
+ {file = "watchfiles-1.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8d0d0630930f5cd5af929040e0778cf676a46775753e442a3f60511f2409f48f"},
+ {file = "watchfiles-1.0.4-cp310-cp310-win32.whl", hash = "sha256:cc27a65069bcabac4552f34fd2dce923ce3fcde0721a16e4fb1b466d63ec831f"},
+ {file = "watchfiles-1.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:8b1f135238e75d075359cf506b27bf3f4ca12029c47d3e769d8593a2024ce161"},
+ {file = "watchfiles-1.0.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2a9f93f8439639dc244c4d2902abe35b0279102bca7bbcf119af964f51d53c19"},
+ {file = "watchfiles-1.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9eea33ad8c418847dd296e61eb683cae1c63329b6d854aefcd412e12d94ee235"},
+ {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31f1a379c9dcbb3f09cf6be1b7e83b67c0e9faabed0471556d9438a4a4e14202"},
+ {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ab594e75644421ae0a2484554832ca5895f8cab5ab62de30a1a57db460ce06c6"},
+ {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc2eb5d14a8e0d5df7b36288979176fbb39672d45184fc4b1c004d7c3ce29317"},
+ {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f68d8e9d5a321163ddacebe97091000955a1b74cd43724e346056030b0bacee"},
+ {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9ce064e81fe79faa925ff03b9f4c1a98b0bbb4a1b8c1b015afa93030cb21a49"},
+ {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b77d5622ac5cc91d21ae9c2b284b5d5c51085a0bdb7b518dba263d0af006132c"},
+ {file = "watchfiles-1.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1941b4e39de9b38b868a69b911df5e89dc43767feeda667b40ae032522b9b5f1"},
+ {file = "watchfiles-1.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4f8c4998506241dedf59613082d1c18b836e26ef2a4caecad0ec41e2a15e4226"},
+ {file = "watchfiles-1.0.4-cp311-cp311-win32.whl", hash = "sha256:4ebbeca9360c830766b9f0df3640b791be569d988f4be6c06d6fae41f187f105"},
+ {file = "watchfiles-1.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:05d341c71f3d7098920f8551d4df47f7b57ac5b8dad56558064c3431bdfc0b74"},
+ {file = "watchfiles-1.0.4-cp311-cp311-win_arm64.whl", hash = "sha256:32b026a6ab64245b584acf4931fe21842374da82372d5c039cba6bf99ef722f3"},
+ {file = "watchfiles-1.0.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:229e6ec880eca20e0ba2f7e2249c85bae1999d330161f45c78d160832e026ee2"},
+ {file = "watchfiles-1.0.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5717021b199e8353782dce03bd8a8f64438832b84e2885c4a645f9723bf656d9"},
+ {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0799ae68dfa95136dde7c472525700bd48777875a4abb2ee454e3ab18e9fc712"},
+ {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:43b168bba889886b62edb0397cab5b6490ffb656ee2fcb22dec8bfeb371a9e12"},
+ {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb2c46e275fbb9f0c92e7654b231543c7bbfa1df07cdc4b99fa73bedfde5c844"},
+ {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:857f5fc3aa027ff5e57047da93f96e908a35fe602d24f5e5d8ce64bf1f2fc733"},
+ {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55ccfd27c497b228581e2838d4386301227fc0cb47f5a12923ec2fe4f97b95af"},
+ {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c11ea22304d17d4385067588123658e9f23159225a27b983f343fcffc3e796a"},
+ {file = "watchfiles-1.0.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:74cb3ca19a740be4caa18f238298b9d472c850f7b2ed89f396c00a4c97e2d9ff"},
+ {file = "watchfiles-1.0.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c7cce76c138a91e720d1df54014a047e680b652336e1b73b8e3ff3158e05061e"},
+ {file = "watchfiles-1.0.4-cp312-cp312-win32.whl", hash = "sha256:b045c800d55bc7e2cadd47f45a97c7b29f70f08a7c2fa13241905010a5493f94"},
+ {file = "watchfiles-1.0.4-cp312-cp312-win_amd64.whl", hash = "sha256:c2acfa49dd0ad0bf2a9c0bb9a985af02e89345a7189be1efc6baa085e0f72d7c"},
+ {file = "watchfiles-1.0.4-cp312-cp312-win_arm64.whl", hash = "sha256:22bb55a7c9e564e763ea06c7acea24fc5d2ee5dfc5dafc5cfbedfe58505e9f90"},
+ {file = "watchfiles-1.0.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:8012bd820c380c3d3db8435e8cf7592260257b378b649154a7948a663b5f84e9"},
+ {file = "watchfiles-1.0.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa216f87594f951c17511efe5912808dfcc4befa464ab17c98d387830ce07b60"},
+ {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c9953cf85529c05b24705639ffa390f78c26449e15ec34d5339e8108c7c407"},
+ {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7cf684aa9bba4cd95ecb62c822a56de54e3ae0598c1a7f2065d51e24637a3c5d"},
+ {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f44a39aee3cbb9b825285ff979ab887a25c5d336e5ec3574f1506a4671556a8d"},
+ {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38320582736922be8c865d46520c043bff350956dfc9fbaee3b2df4e1740a4b"},
+ {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39f4914548b818540ef21fd22447a63e7be6e24b43a70f7642d21f1e73371590"},
+ {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f12969a3765909cf5dc1e50b2436eb2c0e676a3c75773ab8cc3aa6175c16e902"},
+ {file = "watchfiles-1.0.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:0986902677a1a5e6212d0c49b319aad9cc48da4bd967f86a11bde96ad9676ca1"},
+ {file = "watchfiles-1.0.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:308ac265c56f936636e3b0e3f59e059a40003c655228c131e1ad439957592303"},
+ {file = "watchfiles-1.0.4-cp313-cp313-win32.whl", hash = "sha256:aee397456a29b492c20fda2d8961e1ffb266223625346ace14e4b6d861ba9c80"},
+ {file = "watchfiles-1.0.4-cp313-cp313-win_amd64.whl", hash = "sha256:d6097538b0ae5c1b88c3b55afa245a66793a8fec7ada6755322e465fb1a0e8cc"},
+ {file = "watchfiles-1.0.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:d3452c1ec703aa1c61e15dfe9d482543e4145e7c45a6b8566978fbb044265a21"},
+ {file = "watchfiles-1.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7b75fee5a16826cf5c46fe1c63116e4a156924d668c38b013e6276f2582230f0"},
+ {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e997802d78cdb02623b5941830ab06f8860038faf344f0d288d325cc9c5d2ff"},
+ {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0611d244ce94d83f5b9aff441ad196c6e21b55f77f3c47608dcf651efe54c4a"},
+ {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9745a4210b59e218ce64c91deb599ae8775c8a9da4e95fb2ee6fe745fc87d01a"},
+ {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4810ea2ae622add560f4aa50c92fef975e475f7ac4900ce5ff5547b2434642d8"},
+ {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:740d103cd01458f22462dedeb5a3382b7f2c57d07ff033fbc9465919e5e1d0f3"},
+ {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdbd912a61543a36aef85e34f212e5d2486e7c53ebfdb70d1e0b060cc50dd0bf"},
+ {file = "watchfiles-1.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0bc80d91ddaf95f70258cf78c471246846c1986bcc5fd33ccc4a1a67fcb40f9a"},
+ {file = "watchfiles-1.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ab0311bb2ffcd9f74b6c9de2dda1612c13c84b996d032cd74799adb656af4e8b"},
+ {file = "watchfiles-1.0.4-cp39-cp39-win32.whl", hash = "sha256:02a526ee5b5a09e8168314c905fc545c9bc46509896ed282aeb5a8ba9bd6ca27"},
+ {file = "watchfiles-1.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:a5ae5706058b27c74bac987d615105da17724172d5aaacc6c362a40599b6de43"},
+ {file = "watchfiles-1.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cdcc92daeae268de1acf5b7befcd6cfffd9a047098199056c72e4623f531de18"},
+ {file = "watchfiles-1.0.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8d3d9203705b5797f0af7e7e5baa17c8588030aaadb7f6a86107b7247303817"},
+ {file = "watchfiles-1.0.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdef5a1be32d0b07dcea3318a0be95d42c98ece24177820226b56276e06b63b0"},
+ {file = "watchfiles-1.0.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:342622287b5604ddf0ed2d085f3a589099c9ae8b7331df3ae9845571586c4f3d"},
+ {file = "watchfiles-1.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9fe37a2de80aa785d340f2980276b17ef697ab8db6019b07ee4fd28a8359d2f3"},
+ {file = "watchfiles-1.0.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:9d1ef56b56ed7e8f312c934436dea93bfa3e7368adfcf3df4c0da6d4de959a1e"},
+ {file = "watchfiles-1.0.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95b42cac65beae3a362629950c444077d1b44f1790ea2772beaea95451c086bb"},
+ {file = "watchfiles-1.0.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e0227b8ed9074c6172cf55d85b5670199c99ab11fd27d2c473aa30aec67ee42"},
+ {file = "watchfiles-1.0.4.tar.gz", hash = "sha256:6ba473efd11062d73e4f00c2b730255f9c1bdd73cd5f9fe5b5da8dbd4a717205"},
+]
+
+[package.dependencies]
+anyio = ">=3.0.0"
+
[[package]]
name = "websocket-client"
version = "1.8.0"
@@ -2246,7 +2837,7 @@ version = "14.1"
description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
optional = false
python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "docs"]
files = [
{file = "websockets-14.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a0adf84bc2e7c86e8a202537b4fd50e6f7f0e4a6b6bf64d7ccb96c4cd3330b29"},
{file = "websockets-14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90b5d9dfbb6d07a84ed3e696012610b6da074d97453bd01e0e30744b472c8179"},
@@ -2375,4 +2966,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.13"
-content-hash = "99800b85fc1678ba4eca510a3c01ba273f229644b08c711a2e466845794abf38"
+content-hash = "462c7c5f9d1fbae895d6299ba0b690b6e24d0655a4c9fc79f75ddef4eec222f8"
diff --git a/pyproject.toml b/pyproject.toml
index 0cd5f8e..ccfcae6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,6 +62,15 @@ dependencies = [
pytest = "^8.3.4"
autopep8 = "^2.3.1"
+[tool.poetry.group.docs.dependencies]
+sphinx = "^8.1.3"
+sphinx-autoapi = "^3.4.0"
+sphinxcontrib-mermaid = "^1.0.0"
+sphinx-autobuild = "^2024.10.3"
+sphinx-copybutton = "^0.5.2"
+myst-parser = "^4.0.0"
+furo = "^2024.8.6"
+
[project.scripts]
auto-archiver = "auto_archiver.__main__:main"
diff --git a/src/__init__.py b/src/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/auto_archiver/__main__.py b/src/auto_archiver/__main__.py
index b33bc14..1254ec4 100644
--- a/src/auto_archiver/__main__.py
+++ b/src/auto_archiver/__main__.py
@@ -1,3 +1,4 @@
+""" Entry point for the auto_archiver package. """
from . import Config
from . import ArchivingOrchestrator
diff --git a/src/auto_archiver/archivers/__init__.py b/src/auto_archiver/archivers/__init__.py
index dc28269..5733290 100644
--- a/src/auto_archiver/archivers/__init__.py
+++ b/src/auto_archiver/archivers/__init__.py
@@ -1,3 +1,10 @@
+"""
+Archivers are responsible for retrieving the content from various external platforms.
+They act as specialized modules, each tailored to interact with a specific platform,
+service, or data source. The archivers collectively enable the tool to comprehensively
+collect and preserve a variety of content types, such as posts, images, videos and metadata.
+
+"""
from .archiver import Archiver
from .telethon_archiver import TelethonArchiver
from .twitter_api_archiver import TwitterApiArchiver
@@ -6,4 +13,4 @@ from .instagram_tbot_archiver import InstagramTbotArchiver
from .telegram_archiver import TelegramArchiver
from .vk_archiver import VkArchiver
from .generic_archiver.generic_archiver import GenericArchiver as YoutubeDLArchiver
-from .instagram_api_archiver import InstagramAPIArchiver
\ No newline at end of file
+from .instagram_api_archiver import InstagramAPIArchiver
diff --git a/src/auto_archiver/archivers/archiver.py b/src/auto_archiver/archivers/archiver.py
index 911389a..7ec699e 100644
--- a/src/auto_archiver/archivers/archiver.py
+++ b/src/auto_archiver/archivers/archiver.py
@@ -1,3 +1,10 @@
+""" The `archiver` module defines the base functionality for implementing archivers in the media archiving framework.
+ This class provides common utility methods and a standard interface for archivers.
+
+ Factory method to initialize an archiver instance based on its name.
+
+
+"""
from __future__ import annotations
from pathlib import Path
from abc import abstractmethod
@@ -13,6 +20,11 @@ from ..core import Metadata, Step, ArchivingContext
@dataclass
class Archiver(Step):
+ """
+ Base class for implementing archivers in the media archiving framework.
+ Subclasses must implement the `download` method to define platform-specific behavior.
+ """
+
name = "archiver"
def __init__(self, config: dict) -> None:
@@ -87,4 +99,5 @@ class Archiver(Step):
logger.warning(f"Failed to fetch the Media URL: {e}")
@abstractmethod
- def download(self, item: Metadata) -> Metadata: pass
+ def download(self, item: Metadata) -> Metadata:
+ pass
diff --git a/src/auto_archiver/archivers/instagram_api_archiver.py b/src/auto_archiver/archivers/instagram_api_archiver.py
index d8acfd2..d0e7e87 100644
--- a/src/auto_archiver/archivers/instagram_api_archiver.py
+++ b/src/auto_archiver/archivers/instagram_api_archiver.py
@@ -1,4 +1,15 @@
-import re, requests
+"""
+The `instagram_api_archiver` module provides tools for archiving various types of Instagram content
+using the [Instagrapi API](https://github.com/subzeroid/instagrapi).
+
+Connects to an Instagrapi API deployment and allows for downloading Instagram user profiles,
+posts, stories, highlights, and tagged content. It offers advanced configuration options for filtering
+data, reducing JSON output size, and handling large profiles.
+
+"""
+
+import re
+import requests
from datetime import datetime
from loguru import logger
from retrying import retry
diff --git a/src/auto_archiver/archivers/instagram_archiver.py b/src/auto_archiver/archivers/instagram_archiver.py
index 97dd172..94a8fc0 100644
--- a/src/auto_archiver/archivers/instagram_archiver.py
+++ b/src/auto_archiver/archivers/instagram_archiver.py
@@ -1,3 +1,8 @@
+""" Uses the Instaloader library to download content from Instagram. This class handles both individual posts
+ and user profiles, downloading as much information as possible, including images, videos, text, stories,
+ highlights, and tagged posts. Authentication is required via username/password or a session file.
+
+"""
import re, os, shutil, traceback
import instaloader # https://instaloader.github.io/as-module.html
from loguru import logger
diff --git a/src/auto_archiver/archivers/instagram_tbot_archiver.py b/src/auto_archiver/archivers/instagram_tbot_archiver.py
index 0acc08b..01b1614 100644
--- a/src/auto_archiver/archivers/instagram_tbot_archiver.py
+++ b/src/auto_archiver/archivers/instagram_tbot_archiver.py
@@ -1,3 +1,11 @@
+"""
+InstagramTbotArchiver Module
+
+This module provides functionality to archive Instagram content (posts, stories, etc.) using a Telegram bot (`instagram_load_bot`).
+It interacts with the Telegram API via the Telethon library to send Instagram URLs to the bot, which retrieves the
+relevant media and metadata. The fetched content is saved as `Media` objects in a temporary directory and returned as a
+`Metadata` object.
+"""
import shutil
from telethon.sync import TelegramClient
diff --git a/src/auto_archiver/core/__init__.py b/src/auto_archiver/core/__init__.py
index 99765c7..b78df83 100644
--- a/src/auto_archiver/core/__init__.py
+++ b/src/auto_archiver/core/__init__.py
@@ -1,3 +1,6 @@
+""" Core modules to handle things such as orchestration, metadata and configs..
+
+"""
from .metadata import Metadata
from .media import Media
from .step import Step
diff --git a/src/auto_archiver/core/config.py b/src/auto_archiver/core/config.py
index 380319c..c6a2209 100644
--- a/src/auto_archiver/core/config.py
+++ b/src/auto_archiver/core/config.py
@@ -1,3 +1,10 @@
+"""
+The Config class initializes and parses configurations for all other steps.
+It supports CLI argument parsing, loading from YAML file, and overrides to allow
+flexible setup in various environments.
+
+"""
+
import importlib
import argparse
import yaml
@@ -56,6 +63,7 @@ class Config:
parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
parser.add_argument('--version', action='version', version=importlib.metadata.version('auto_archiver'))
+ # Iterate over all step subclasses to gather default configs and CLI arguments
for configurable in self.configurable_parents:
child: Step
for child in configurable.__subclasses__():
diff --git a/src/auto_archiver/core/context.py b/src/auto_archiver/core/context.py
index 8fdf040..9a21b5c 100644
--- a/src/auto_archiver/core/context.py
+++ b/src/auto_archiver/core/context.py
@@ -1,6 +1,21 @@
+""" ArchivingContext provides a global context for managing configurations and temporary data during the archiving process.
+
+This singleton class allows for:
+- Storing and retrieving key-value pairs that are accessible throughout the application lifecycle.
+- Marking certain values to persist across resets using `keep_on_reset`.
+- Managing temporary directories and other shared data used during the archiving process.
+
+### Key Features:
+- Creates a single global instance.
+- Reset functionality allows for clearing configurations, with options for partial or full resets.
+- Custom getters and setters for commonly used context values like temporary directories.
+
+"""
+
class ArchivingContext:
"""
- Singleton context class.
+ Singleton context class for managing global configurations and temporary data.
+
ArchivingContext._get_instance() to retrieve it if needed
otherwise just
ArchivingContext.set(key, value)
diff --git a/src/auto_archiver/core/media.py b/src/auto_archiver/core/media.py
index bf9580e..d204a6e 100644
--- a/src/auto_archiver/core/media.py
+++ b/src/auto_archiver/core/media.py
@@ -1,3 +1,7 @@
+"""
+Manages media files and their associated metadata, supporting storage,
+nested media retrieval, and type validation.
+"""
from __future__ import annotations
import os
@@ -18,6 +22,16 @@ from loguru import logger
@dataclass_json # annotation order matters
@dataclass
class Media:
+ """
+ Represents a media file with associated properties and storage details.
+
+ Attributes:
+ - filename: The file path of the media.
+ - key: An optional identifier for the media.
+ - urls: A list of URLs where the media is stored or accessible.
+ - properties: Additional metadata or transformations for the media.
+ - _mimetype: The media's mimetype (e.g., image/jpeg, video/mp4).
+ """
filename: str
key: str = None
urls: List[str] = field(default_factory=list)
@@ -40,8 +54,9 @@ class Media:
s.store(any_media, url, metadata=metadata)
def all_inner_media(self, include_self=False):
- """ Media can be inside media properties, examples include transformations on original media.
- This function returns a generator for all the inner media.
+ """Retrieves all media, including nested media within properties or transformations on original media.
+ This function returns a generator for all the inner media.
+
"""
if include_self: yield self
for prop in self.properties.values():
diff --git a/src/auto_archiver/core/metadata.py b/src/auto_archiver/core/metadata.py
index 0a2ad07..04683dd 100644
--- a/src/auto_archiver/core/metadata.py
+++ b/src/auto_archiver/core/metadata.py
@@ -1,3 +1,13 @@
+"""
+Acts as a container for metadata and media objects associated with an archived item.
+
+Key Functionalities:
+- Store and retrieve metadata and associated media.
+- Merge metadata objects with conflict resolution.
+- Validate properties like URLs and timestamps.
+- Manage and deduplicate media objects.
+- Support for flexible metadata querying and appending.
+"""
from __future__ import annotations
import hashlib
@@ -25,7 +35,11 @@ class Metadata:
def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata:
"""
- merges two Metadata instances, will overwrite according to overwrite_left flag
+ Merges another `Metadata` instance into this one.
+
+ Conflicts are resolved based on the `overwrite_left` flag:
+ - If `True`, this instance's values are overwritten by `right`.
+ - If `False`, the inverse applies.
"""
if not right: return self
if overwrite_left:
@@ -191,4 +205,4 @@ class Metadata:
for r in results[1:]:
if len(r.media) > len(most_complete.media): most_complete = r
elif len(r.media) == len(most_complete.media) and len(r.metadata) > len(most_complete.metadata): most_complete = r
- return most_complete
\ No newline at end of file
+ return most_complete
diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py
index 0594dde..3290070 100644
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -1,3 +1,9 @@
+""" Orchestrates all archiving steps, including feeding items,
+ archiving them with specific archivers, enrichment, storage,
+ formatting, database operations and clean up.
+
+"""
+
from __future__ import annotations
from typing import Generator, Union, List
from urllib.parse import urlparse
diff --git a/src/auto_archiver/core/step.py b/src/auto_archiver/core/step.py
index 3917a89..9f294fe 100644
--- a/src/auto_archiver/core/step.py
+++ b/src/auto_archiver/core/step.py
@@ -1,3 +1,9 @@
+"""
+Defines the Step abstract base class, which acts as a blueprint for steps in the archiving pipeline
+by handling user configuration, validating the steps properties, and implementing dynamic instantiation.
+
+"""
+
from __future__ import annotations
from dataclasses import dataclass
from inspect import ClassFoundException
@@ -10,6 +16,7 @@ class Step(ABC):
name: str = None
def __init__(self, config: dict) -> None:
+ # Initialises each step by reading the relevant entries
# reads the configs into object properties
# self.config = config[self.name]
for k, v in config.get(self.name, {}).items():
@@ -20,7 +27,9 @@ class Step(ABC):
def init(name: str, config: dict, child: Type[Step]) -> Step:
"""
- looks into direct subclasses of child for name and returns such an object
+ Attempts to instantiate a subclass of the provided `child` type
+ matching the given `name`.
+ Raises ClassFoundException if no matching subclass is found.
TODO: cannot find subclasses of child.subclasses
"""
for sub in child.__subclasses__():
@@ -30,7 +39,9 @@ class Step(ABC):
def assert_valid_string(self, prop: str) -> None:
"""
- receives a property name an ensures it exists and is a valid non-empty string, raises an exception if not
+ Receives a property name and ensures it exists and is a valid non-empty string,
+ raising an AssertionError if not.
+ TODO: replace assertions with custom exceptions
"""
assert hasattr(self, prop), f"property {prop} not found"
s = getattr(self, prop)
diff --git a/src/auto_archiver/databases/__init__.py b/src/auto_archiver/databases/__init__.py
index df48f39..4c73896 100644
--- a/src/auto_archiver/databases/__init__.py
+++ b/src/auto_archiver/databases/__init__.py
@@ -1,3 +1,7 @@
+""" Databases are used to store the outputs from running the Autp Archiver.
+
+
+"""
from .database import Database
from .gsheet_db import GsheetsDb
from .console_db import ConsoleDb
diff --git a/src/auto_archiver/databases/api_db.py b/src/auto_archiver/databases/api_db.py
index 233e2a9..4304855 100644
--- a/src/auto_archiver/databases/api_db.py
+++ b/src/auto_archiver/databases/api_db.py
@@ -32,7 +32,9 @@ class AAApiDb(Database):
"tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
}
def fetch(self, item: Metadata) -> Union[Metadata, bool]:
- """ query the database for the existence of this item"""
+ """ query the database for the existence of this item.
+ Helps avoid re-archiving the same URL multiple times.
+ """
if not self.allow_rearchive: return
params = {"url": item.get_url(), "limit": 15}
diff --git a/src/auto_archiver/enrichers/__init__.py b/src/auto_archiver/enrichers/__init__.py
index 5681403..64ce248 100644
--- a/src/auto_archiver/enrichers/__init__.py
+++ b/src/auto_archiver/enrichers/__init__.py
@@ -1,3 +1,15 @@
+"""
+Enrichers are modular components that enhance archived content by adding
+context, metadata, or additional processing.
+
+These add additional information to the context, such as screenshots, hashes, and metadata.
+They are designed to work within the archiving pipeline, operating on `Metadata` objects after
+the archiving step and before storage or formatting.
+
+Enrichers are optional but highly useful for making the archived data more powerful.
+
+
+"""
from .enricher import Enricher
from .screenshot_enricher import ScreenshotEnricher
from .wayback_enricher import WaybackArchiverEnricher
diff --git a/src/auto_archiver/enrichers/enricher.py b/src/auto_archiver/enrichers/enricher.py
index 4948d57..f195f23 100644
--- a/src/auto_archiver/enrichers/enricher.py
+++ b/src/auto_archiver/enrichers/enricher.py
@@ -1,3 +1,5 @@
+""" Base classes and utilities for enrichers in the Auto-Archiver system.
+"""
from __future__ import annotations
from dataclasses import dataclass
from abc import abstractmethod, ABC
diff --git a/src/auto_archiver/enrichers/hash_enricher.py b/src/auto_archiver/enrichers/hash_enricher.py
index 1270149..69973b7 100644
--- a/src/auto_archiver/enrichers/hash_enricher.py
+++ b/src/auto_archiver/enrichers/hash_enricher.py
@@ -1,3 +1,12 @@
+""" Hash Enricher for generating cryptographic hashes of media files.
+
+The `HashEnricher` calculates cryptographic hashes (e.g., SHA-256, SHA3-512)
+for media files stored in `Metadata` objects. These hashes are used for
+validating content integrity, ensuring data authenticity, and identifying
+exact duplicates. The hash is computed by reading the file's bytes in chunks,
+making it suitable for handling large files efficiently.
+
+"""
import hashlib
from loguru import logger
diff --git a/src/auto_archiver/enrichers/pdq_hash_enricher.py b/src/auto_archiver/enrichers/pdq_hash_enricher.py
index ff88bab..36f793d 100644
--- a/src/auto_archiver/enrichers/pdq_hash_enricher.py
+++ b/src/auto_archiver/enrichers/pdq_hash_enricher.py
@@ -1,3 +1,15 @@
+"""
+PDQ Hash Enricher for generating perceptual hashes of media files.
+
+The `PdqHashEnricher` processes media files (e.g., images) in `Metadata`
+objects and calculates perceptual hashes using the PDQ hashing algorithm.
+These hashes are designed specifically for images and can be used
+for detecting duplicate or near-duplicate visual content.
+
+This enricher is typically used after thumbnail or screenshot enrichers
+to ensure images are available for hashing.
+
+"""
import traceback
import pdqhash
import numpy as np
diff --git a/src/auto_archiver/enrichers/thumbnail_enricher.py b/src/auto_archiver/enrichers/thumbnail_enricher.py
index 0ffbe38..5d8bee2 100644
--- a/src/auto_archiver/enrichers/thumbnail_enricher.py
+++ b/src/auto_archiver/enrichers/thumbnail_enricher.py
@@ -1,3 +1,11 @@
+"""Thumbnail Enricher for generating visual previews of video files.
+
+The `ThumbnailEnricher` processes video files in `Metadata` objects and
+creates evenly distributed thumbnail images. These thumbnails provide
+visual snapshots of the video's keyframes, helping users preview content
+and identify important moments without watching the entire video.
+
+"""
import ffmpeg, os
from loguru import logger
diff --git a/src/auto_archiver/feeders/__init__.py b/src/auto_archiver/feeders/__init__.py
index 84a8495..8117672 100644
--- a/src/auto_archiver/feeders/__init__.py
+++ b/src/auto_archiver/feeders/__init__.py
@@ -1,3 +1,6 @@
+""" Feeders handle the input of media into the Auto Archiver.
+
+"""
from.feeder import Feeder
from .gsheet_feeder import GsheetsFeeder
from .cli_feeder import CLIFeeder
diff --git a/src/auto_archiver/feeders/gsheet_feeder.py b/src/auto_archiver/feeders/gsheet_feeder.py
index 19142f4..1c4fc32 100644
--- a/src/auto_archiver/feeders/gsheet_feeder.py
+++ b/src/auto_archiver/feeders/gsheet_feeder.py
@@ -1,3 +1,13 @@
+"""
+GsheetsFeeder: A Google Sheets-based feeder for the Auto Archiver.
+
+This reads data from Google Sheets and filters rows based on user-defined rules.
+The filtered rows are processed into `Metadata` objects.
+
+### Key properties
+- validates the sheet's structure and filters rows based on input configurations.
+- Ensures only rows with valid URLs and unprocessed statuses are included.
+"""
import gspread, os
from loguru import logger
diff --git a/src/auto_archiver/formatters/__init__.py b/src/auto_archiver/formatters/__init__.py
index ce8afac..af96f15 100644
--- a/src/auto_archiver/formatters/__init__.py
+++ b/src/auto_archiver/formatters/__init__.py
@@ -1,3 +1,4 @@
+""" Formatters for the output of the content. """
from .formatter import Formatter
from .html_formatter import HtmlFormatter
from .mute_formatter import MuteFormatter
\ No newline at end of file
diff --git a/src/auto_archiver/storages/__init__.py b/src/auto_archiver/storages/__init__.py
index 5f768a6..bff83e6 100644
--- a/src/auto_archiver/storages/__init__.py
+++ b/src/auto_archiver/storages/__init__.py
@@ -1,3 +1,6 @@
+""" This module contains the storage classes for the auto-archiver.
+
+"""
from .storage import Storage
from .s3 import S3Storage
from .local import LocalStorage
diff --git a/src/auto_archiver/utils/__init__.py b/src/auto_archiver/utils/__init__.py
index 36ce765..788f159 100644
--- a/src/auto_archiver/utils/__init__.py
+++ b/src/auto_archiver/utils/__init__.py
@@ -1,3 +1,4 @@
+""" Auto Archiver Utilities. """
# we need to explicitly expose the available imports here
from .gworksheet import GWorksheet
from .misc import *
diff --git a/src/auto_archiver/utils/webdriver.py b/src/auto_archiver/utils/webdriver.py
index 7e95330..cf84c35 100644
--- a/src/auto_archiver/utils/webdriver.py
+++ b/src/auto_archiver/utils/webdriver.py
@@ -1,3 +1,4 @@
+""" This Webdriver class acts as a context manager for the selenium webdriver. """
from __future__ import annotations
from selenium import webdriver
from selenium.common.exceptions import TimeoutException