Update manifests and modules

2025-01-24 12:58:16 +00:00 · 2025-01-24 12:58:16 +00:00 · aa7ca93a43
commit aa7ca93a43
--- a/src/auto_archiver/base_processors/init.py
+++ b/src/auto_archiver/base_processors/init.py
--- a/src/auto_archiver/base_processors/database.py
+++ b/src/auto_archiver/base_processors/database.py
--- a/src/auto_archiver/base_processors/enricher.py
+++ b/src/auto_archiver/base_processors/enricher.py
--- a/src/auto_archiver/base_processors/extractor.py
+++ b/src/auto_archiver/base_processors/extractor.py
--- a/src/auto_archiver/base_processors/feeder.py
+++ b/src/auto_archiver/base_processors/feeder.py
--- a/src/auto_archiver/base_processors/formatter.py
+++ b/src/auto_archiver/base_processors/formatter.py
--- a/src/auto_archiver/base_processors/storage.py
+++ b/src/auto_archiver/base_processors/storage.py
--- a/src/auto_archiver/modules/api_db/init.py
+++ b/src/auto_archiver/modules/api_db/init.py
@ -0,0 +1 @@
+from api_db import AAApiDb
--- a/src/auto_archiver/modules/api_db/manifest.py
+++ b/src/auto_archiver/modules/api_db/manifest.py
@ -15,7 +15,9 @@
            "group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
            "allow_rearchive": {"default": True, "help": "if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived"},
            "store_results": {"default": True, "help": "when set, will send the results to the API database."},
-            "tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
+            "tags": {"default": [], "help": "what tags to add to the archived URL",
+                     "type": lambda val: set(val.split(",")),
+                     }
        },
    "description": """
     Provides integration with the Auto-Archiver API for querying and storing archival data.
--- a/src/auto_archiver/modules/api_db/api_db.py
+++ b/src/auto_archiver/modules/api_db/api_db.py
@ -2,7 +2,7 @@ from typing import Union
 import requests, os
 from loguru import logger

-from auto_archiver.base_modules import Database
+from auto_archiver.base_processors import Database
 from auto_archiver.core import Metadata


--- a/src/auto_archiver/modules/atlos/init.py
+++ b/src/auto_archiver/modules/atlos/init.py
@ -0,0 +1 @@
+from .atlos import AtlosStorage
--- a/src/auto_archiver/modules/atlos/manifest.py
+++ b/src/auto_archiver/modules/atlos/manifest.py
@ -15,12 +15,12 @@
        "api_token": {
            "default": None,
            "help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
-            "cli_set": lambda cli_val, _: cli_val,
+            "type": str,
        },
        "atlos_url": {
            "default": "https://platform.atlos.org",
            "help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
-            "cli_set": lambda cli_val, _: cli_val,
+            "type": str,
        },
    },
    "description": """
--- a/src/auto_archiver/modules/atlos/atlos.py
+++ b/src/auto_archiver/modules/atlos/atlos.py
@ -5,7 +5,7 @@ import requests
 import hashlib

 from auto_archiver.core import Media, Metadata
-from auto_archiver.base_modules import Storage
+from auto_archiver.base_processors import Storage
 from auto_archiver.utils import get_atlos_config_options


--- a/src/auto_archiver/modules/atlos_db/init.py
+++ b/src/auto_archiver/modules/atlos_db/init.py
@ -0,0 +1 @@
+from atlos_db import AtlosDb
--- a/src/auto_archiver/modules/atlos_db/manifest.py
+++ b/src/auto_archiver/modules/atlos_db/manifest.py
@ -11,12 +11,11 @@
        "api_token": {
            "default": None,
            "help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
-            "cli_set": lambda cli_val, _: cli_val
        },
        "atlos_url": {
            "default": "https://platform.atlos.org",
            "help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
-            "cli_set": lambda cli_val, _: cli_val
+            "type": str
        },
    },
    "description": """
--- a/src/auto_archiver/modules/atlos_db/atlos_db.py
+++ b/src/auto_archiver/modules/atlos_db/atlos_db.py
@ -6,7 +6,7 @@ from csv import DictWriter
 from dataclasses import asdict
 import requests

-from auto_archiver.base_modules import Database
+from auto_archiver.base_processors import Database
 from auto_archiver.core import Metadata
 from auto_archiver.utils import get_atlos_config_options

--- a/src/auto_archiver/modules/atlos_db/base_configs.py
+++ b/src/auto_archiver/modules/atlos_db/base_configs.py
@ -3,11 +3,11 @@ def get_atlos_config_options():
        "api_token": {
            "default": None,
            "help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
-            "cli_set": lambda cli_val, _: cli_val
+            "type": str
        },
        "atlos_url": {
            "default": "https://platform.atlos.org",
            "help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
-            "cli_set": lambda cli_val, _: cli_val
+            "type": str
        },
    }
--- a/src/auto_archiver/modules/atlos_feeder/init.py
+++ b/src/auto_archiver/modules/atlos_feeder/init.py
@ -0,0 +1 @@
+from .atlos_feeder import AtlosFeeder
--- a/src/auto_archiver/modules/atlos_feeder/manifest.py
+++ b/src/auto_archiver/modules/atlos_feeder/manifest.py
@ -9,12 +9,12 @@
        "api_token": {
            "default": None,
            "help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
-            "cli_set": lambda cli_val, _: cli_val
+            "type": str
        },
        "atlos_url": {
            "default": "https://platform.atlos.org",
            "help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
-            "cli_set": lambda cli_val, _: cli_val
+            "type": str
        },
    },
    "description": """
--- a/src/auto_archiver/modules/atlos_feeder/atlos_feeder.py
+++ b/src/auto_archiver/modules/atlos_feeder/atlos_feeder.py
@ -1,7 +1,7 @@
 from loguru import logger
 import requests

-from auto_archiver.base_modules import Feeder
+from auto_archiver.base_processors import Feeder
 from auto_archiver.core import Metadata, ArchivingContext
 from auto_archiver.utils import get_atlos_config_options

--- a/src/auto_archiver/modules/cli_feeder/init.py
+++ b/src/auto_archiver/modules/cli_feeder/init.py
@ -0,0 +1 @@
+from .cli_feeder import CLIFeeder
--- a/src/auto_archiver/modules/cli_feeder/manifest.py
+++ b/src/auto_archiver/modules/cli_feeder/manifest.py
@ -9,7 +9,7 @@
        "urls": {
            "default": None,
            "help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
-            "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
+            "type": lambda val: set(val.split(",")),
        },
    },
    "description": """
--- a/src/auto_archiver/modules/cli_feeder/cli_feeder.py
+++ b/src/auto_archiver/modules/cli_feeder/cli_feeder.py
@ -1,6 +1,6 @@
 from loguru import logger

-from auto_archiver.base_modules import Feeder
+from auto_archiver.base_processors import Feeder
 from auto_archiver.core import Metadata, ArchivingContext


--- a/src/auto_archiver/modules/console_db/init.py
+++ b/src/auto_archiver/modules/console_db/init.py
@ -0,0 +1 @@
+from .console_db import ConsoleDb
--- a/src/auto_archiver/modules/console_db/console_db.py
+++ b/src/auto_archiver/modules/console_db/console_db.py
@ -1,6 +1,6 @@
 from loguru import logger

-from auto_archiver.base_modules import Database
+from auto_archiver.base_processors import Database
 from auto_archiver.core import Metadata


--- a/src/auto_archiver/modules/csv_db/init.py
+++ b/src/auto_archiver/modules/csv_db/init.py
@ -0,0 +1 @@
+from .csv_db import CSVDb
--- a/src/auto_archiver/modules/csv_db/csv_db.py
+++ b/src/auto_archiver/modules/csv_db/csv_db.py
@ -3,7 +3,7 @@ from loguru import logger
 from csv import DictWriter
 from dataclasses import asdict

-from auto_archiver.base_modules import Database
+from auto_archiver.base_processors import Database
 from auto_archiver.core import Metadata


--- a/src/auto_archiver/modules/csv_feeder/init.py
+++ b/src/auto_archiver/modules/csv_feeder/init.py
@ -0,0 +1 @@
+from .csv_feeder import CSVFeeder
--- a/src/auto_archiver/modules/csv_feeder/manifest.py
+++ b/src/auto_archiver/modules/csv_feeder/manifest.py
@ -11,7 +11,7 @@
                "default": None,
                "help": "Path to the input file(s) to read the URLs from, comma separated. \
                        Input files should be formatted with one URL per line",
-                "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
+                "type": lambda val: set(val.split(",")),
            },
            "column": {
                "default": None,
--- a/src/auto_archiver/modules/csv_feeder/csv_feeder.py
+++ b/src/auto_archiver/modules/csv_feeder/csv_feeder.py
@ -1,7 +1,7 @@
 from loguru import logger
 import csv

-from auto_archiver.base_modules import Feeder
+from auto_archiver.base_processors import Feeder
 from auto_archiver.core import Metadata, ArchivingContext
 from auto_archiver.utils import url_or_none

@ -17,7 +17,7 @@ class CSVFeeder(Feeder):
                "default": None,
                "help": "Path to the input file(s) to read the URLs from, comma separated. \
                        Input files should be formatted with one URL per line",
-                "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
+                "type": lambda val: set(val.split(",")),
            },
            "column": {
                "default": None,
--- a/src/auto_archiver/modules/gdrive_storage/init.py
+++ b/src/auto_archiver/modules/gdrive_storage/init.py
@ -0,0 +1 @@
+from .gdrive_storage import GDriveStorage
--- a/src/auto_archiver/modules/gdrive_storage/manifest.py
+++ b/src/auto_archiver/modules/gdrive_storage/manifest.py
@ -1,4 +1,4 @@
-m = {
+{
    "name": "Google Drive Storage",
    "type": ["storage"],
    "requires_setup": True,
@ -12,15 +12,16 @@ m = {
        ],
    },
    "configs": {
-            "path_generator": {
-                "default": "url",
-                "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
-            },
-            "filename_generator": {
-                "default": "random",
-                "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
-            },
-        # TODO: get base storage configs
+        "path_generator": {
+            "default": "url",
+            "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
+            "choices": ["flat", "url", "random"],
+        },
+        "filename_generator": {
+            "default": "random",
+            "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
+            "choices": ["random", "static"],
+        },
        "root_folder_id": {"default": None, "help": "root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID'"},
        "oauth_token": {"default": None, "help": "JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account."},
        "service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path, same as used for Google Sheets. NOTE: storage used will count towards the developer account."},
--- a/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
+++ b/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
@ -10,7 +10,7 @@ from google.oauth2.credentials import Credentials
 from google.auth.transport.requests import Request

 from auto_archiver.core import Media
-from auto_archiver.base_modules import Storage
+from auto_archiver.base_processors import Storage


 class GDriveStorage(Storage):
--- a/src/auto_archiver/modules/generic_extractor/bluesky.py
+++ b/src/auto_archiver/modules/generic_extractor/bluesky.py
@ -1,6 +1,6 @@
 from loguru import logger

-from auto_archiver.base_modules.extractor import Extractor
+from auto_archiver.base_processors.extractor import Extractor
 from auto_archiver.core.metadata import Metadata, Media
 from .dropin import GenericDropin, InfoExtractor

--- a/src/auto_archiver/modules/generic_extractor/dropin.py
+++ b/src/auto_archiver/modules/generic_extractor/dropin.py
@ -1,6 +1,6 @@
 from yt_dlp.extractor.common import InfoExtractor
 from auto_archiver.core.metadata import Metadata
-from auto_archiver.base_modules.extractor import Extractor
+from auto_archiver.base_processors.extractor import Extractor

 class GenericDropin:
    """Base class for dropins for the generic extractor.
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@ -5,7 +5,7 @@ from yt_dlp.extractor.common import InfoExtractor

 from loguru import logger

-from auto_archiver.base_modules.extractor import Extractor
+from auto_archiver.base_processors.extractor import Extractor
 from ...core import Metadata, Media, ArchivingContext

 class GenericExtractor(Extractor):
--- a/src/auto_archiver/modules/generic_extractor/truth.py
+++ b/src/auto_archiver/modules/generic_extractor/truth.py
@ -2,7 +2,7 @@ from typing import Type

 from auto_archiver.utils import traverse_obj
 from auto_archiver.core.metadata import Metadata, Media
-from auto_archiver.base_modules.extractor import Extractor
+from auto_archiver.base_processors.extractor import Extractor
 from yt_dlp.extractor.common import InfoExtractor

 from dateutil.parser import parse as parse_dt
--- a/src/auto_archiver/modules/generic_extractor/twitter.py
+++ b/src/auto_archiver/modules/generic_extractor/twitter.py
@ -6,7 +6,7 @@ from slugify import slugify

 from auto_archiver.core.metadata import Metadata, Media
 from auto_archiver.utils import UrlUtil
-from auto_archiver.base_modules.extractor import Extractor
+from auto_archiver.base_processors.extractor import Extractor

 from .dropin import GenericDropin, InfoExtractor

--- a/src/auto_archiver/modules/gsheet_db/init.py
+++ b/src/auto_archiver/modules/gsheet_db/init.py
@ -0,0 +1 @@
+from .gsheet_db import GsheetsDb
--- a/src/auto_archiver/modules/gsheet_db/manifest.py
+++ b/src/auto_archiver/modules/gsheet_db/manifest.py
@ -0,0 +1,38 @@
+{
+    "name": "Google Sheets Database",
+    "type": ["database"],
+    "requires_setup": True,
+    "external_dependencies": {
+        "python": ["loguru", "gspread", "python-slugify"],
+    },
+    "configs": {
+        "allow_worksheets": {
+            "default": set(),
+            "help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
+            "type": lambda val: set(val.split(",")),
+        },
+        "block_worksheets": {
+            "default": set(),
+            "help": "(CSV) explicitly block some worksheets from being processed",
+            "type": lambda val: set(val.split(",")),
+        },
+        "use_sheet_names_in_stored_paths": {
+            "default": True,
+            "help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
+        }
+    },
+    "description": """
+    GsheetsDatabase:
+    Handles integration with Google Sheets for tracking archival tasks.
+
+### Features
+- Updates a Google Sheet with the status of the archived URLs, including in progress, success or failure, and method used.
+- Saves metadata such as title, text, timestamp, hashes, screenshots, and media URLs to designated columns.
+- Formats media-specific metadata, such as thumbnails and PDQ hashes for the sheet.
+- Skips redundant updates for empty or invalid data fields.
+
+### Notes
+- Currently works only with metadata provided by GsheetFeeder. 
+- Requires configuration of a linked Google Sheet and appropriate API credentials.
+    """
+}
--- a/src/auto_archiver/modules/gsheet_processor/gsheet_db.py
+++ b/src/auto_archiver/modules/gsheet_processor/gsheet_db.py
@ -5,7 +5,7 @@ from urllib.parse import quote

 from loguru import logger

-from auto_archiver.base_modules import Database
+from auto_archiver.base_processors import Database
 from auto_archiver.core import Metadata, Media, ArchivingContext
 from auto_archiver.utils import GWorksheet

@ -105,5 +105,4 @@ class GsheetsDb(Database):
        elif self.sheet_id:
            print(self.sheet_id)

-
        return gw, row
--- a/src/auto_archiver/modules/gsheet_feeder/init.py
+++ b/src/auto_archiver/modules/gsheet_feeder/init.py
@ -0,0 +1 @@
+from .gsheet_feeder import GsheetsFeeder
--- a/src/auto_archiver/modules/gsheet_processor/manifest.py
+++ b/src/auto_archiver/modules/gsheet_processor/manifest.py
@ -9,12 +9,12 @@
        "allow_worksheets": {
            "default": set(),
            "help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
-            "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
+            "type": lambda val: set(val.split(",")),
        },
        "block_worksheets": {
            "default": set(),
            "help": "(CSV) explicitly block some worksheets from being processed",
-            "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
+            "type": lambda val: set(val.split(",")),
        },
        "use_sheet_names_in_stored_paths": {
            "default": True,
@ -22,11 +22,7 @@
        }
    },
    "description": """
-    Google Sheets Module.
-    
-    Handles feeding from a google sheet as well as an optional write back to the sheet.
-    
-    ## GsheetsFeeder 
+    GsheetsFeeder 
    A Google Sheets-based feeder for the Auto Archiver.

    This reads data from Google Sheets and filters rows based on user-defined rules.
@ -41,18 +37,5 @@
    ### Notes
    - Requires a Google Service Account JSON file for authentication. Suggested location is `secrets/gsheets_service_account.json`.
    - Create the sheet using the template provided in the docs.
-    
-    ## GsheetsDatabase:
-    Handles integration with Google Sheets for tracking archival tasks.
-
-### Features
- Updates a Google Sheet with the status of the archived URLs, including in progress, success or failure, and method used.
- Saves metadata such as title, text, timestamp, hashes, screenshots, and media URLs to designated columns.
- Formats media-specific metadata, such as thumbnails and PDQ hashes for the sheet.
- Skips redundant updates for empty or invalid data fields.
-
-### Notes
- Currently works only with metadata provided by GsheetFeeder. 
- Requires configuration of a linked Google Sheet and appropriate API credentials.
    """
 }
--- a/src/auto_archiver/modules/gsheet_processor/gsheet_feeder.py
+++ b/src/auto_archiver/modules/gsheet_processor/gsheet_feeder.py
@ -13,7 +13,7 @@ import gspread, os
 from loguru import logger
 from slugify import slugify

-from auto_archiver.base_modules import Feeder
+from auto_archiver.base_processors import Feeder
 from auto_archiver.core import Metadata, ArchivingContext
 from auto_archiver.utils import Gsheets, GWorksheet

--- a/src/auto_archiver/modules/gsheet_processor/init.py
+++ b/src/auto_archiver/modules/gsheet_processor/init.py
--- a/src/auto_archiver/modules/hash_enricher/init.py
+++ b/src/auto_archiver/modules/hash_enricher/init.py
@ -1 +1 @@
-from hash_enricher import HashEnricher
+from .hash_enricher import HashEnricher
--- a/src/auto_archiver/modules/hash_enricher/hash_enricher.py
+++ b/src/auto_archiver/modules/hash_enricher/hash_enricher.py
@ -10,7 +10,7 @@ making it suitable for handling large files efficiently.
 import hashlib
 from loguru import logger

-from auto_archiver.base_modules import Enricher
+from auto_archiver.base_processors import Enricher
 from auto_archiver.core import Metadata, ArchivingContext


--- a/src/auto_archiver/modules/html_formatter/init.py
+++ b/src/auto_archiver/modules/html_formatter/init.py
@ -0,0 +1 @@
+from .html_formatter import HtmlFormatter
--- a/src/auto_archiver/modules/html_formatter/manifest.py
+++ b/src/auto_archiver/modules/html_formatter/manifest.py
@ -1,4 +1,4 @@
-m = {
+{
    "name": "HTML Formatter",
    "type": ["formatter"],
    "requires_setup": False,
--- a/src/auto_archiver/modules/html_formatter/html_formatter.py
+++ b/src/auto_archiver/modules/html_formatter/html_formatter.py
@ -9,7 +9,7 @@ import base64

 from auto_archiver.version import __version__
 from auto_archiver.core import Metadata, Media, ArchivingContext
-from auto_archiver.base_modules import Formatter
+from auto_archiver.base_processors import Formatter
 from auto_archiver.modules.hash_enricher import HashEnricher
 from auto_archiver.utils.misc import random_str

--- a/src/auto_archiver/modules/instagram_api_extractor/init.py
+++ b/src/auto_archiver/modules/instagram_api_extractor/init.py
@ -0,0 +1 @@
+from .instagram_api_archiver import InstagramAPIExtractor
--- a/src/auto_archiver/modules/instagram_api_extractor/instagram_api_archiver.py
+++ b/src/auto_archiver/modules/instagram_api_extractor/instagram_api_archiver.py
@ -16,7 +16,7 @@ from loguru import logger
 from retrying import retry
 from tqdm import tqdm

-from auto_archiver.base_modules import Extractor
+from auto_archiver.base_processors import Extractor
 from auto_archiver.core import Media
 from auto_archiver.core import Metadata

--- a/src/auto_archiver/modules/instagram_extractor/init.py
+++ b/src/auto_archiver/modules/instagram_extractor/init.py
@ -0,0 +1 @@
+from .instagram_archiver import InstagramExtractor
--- a/src/auto_archiver/modules/instagram_extractor/instagram_archiver.py
+++ b/src/auto_archiver/modules/instagram_extractor/instagram_archiver.py
@ -7,7 +7,7 @@ import re, os, shutil, traceback
 import instaloader  # https://instaloader.github.io/as-module.html
 from loguru import logger

-from auto_archiver.base_modules import Extractor
+from auto_archiver.base_processors import Extractor
 from auto_archiver.core import Metadata
 from auto_archiver.core import Media

--- a/src/auto_archiver/modules/instagram_tbot_extractor/init.py
+++ b/src/auto_archiver/modules/instagram_tbot_extractor/init.py
@ -0,0 +1 @@
+from .instagram_tbot_archiver import InstagramTbotExtractor
--- a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_archiver.py
+++ b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_archiver.py
@ -15,7 +15,7 @@ from sqlite3 import OperationalError
 from loguru import logger
 from telethon.sync import TelegramClient

-from auto_archiver.base_modules import Extractor
+from auto_archiver.base_processors import Extractor
 from auto_archiver.core import Metadata, Media, ArchivingContext
 from auto_archiver.utils import random_str

--- a/src/auto_archiver/modules/local_storage/init.py
+++ b/src/auto_archiver/modules/local_storage/init.py
@ -0,0 +1 @@
+from .local import LocalStorage
--- a/src/auto_archiver/modules/local_storage/manifest.py
+++ b/src/auto_archiver/modules/local_storage/manifest.py
@ -1,4 +1,4 @@
-m = {
+{
    "name": "Local Storage",
    "type": ["storage"],
    "requires_setup": False,
@ -9,10 +9,12 @@ m = {
        "path_generator": {
            "default": "url",
            "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
+            "choices": ["flat", "url", "random"],
        },
        "filename_generator": {
            "default": "random",
            "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
+            "choices": ["random", "static"],
        },
        "save_to": {"default": "./archived", "help": "folder where to save archived content"},
        "save_absolute": {"default": False, "help": "whether the path to the stored file is absolute or relative in the output result inc. formatters (WARN: leaks the file structure)"},
--- a/src/auto_archiver/modules/local_storage/local.py
+++ b/src/auto_archiver/modules/local_storage/local.py
@ -5,7 +5,7 @@ import os
 from loguru import logger

 from auto_archiver.core import Media
-from auto_archiver.base_modules import Storage
+from auto_archiver.base_processors import Storage


 class LocalStorage(Storage):
--- a/src/auto_archiver/modules/meta_enricher/init.py
+++ b/src/auto_archiver/modules/meta_enricher/init.py
@ -0,0 +1 @@
+from .meta_enricher import MetaEnricher
--- a/src/auto_archiver/modules/meta_enricher/meta_enricher.py
+++ b/src/auto_archiver/modules/meta_enricher/meta_enricher.py
@ -2,7 +2,7 @@ import datetime
 import os
 from loguru import logger

-from auto_archiver.base_modules import Enricher
+from auto_archiver.base_processors import Enricher
 from auto_archiver.core import Metadata


--- a/src/auto_archiver/modules/metadata_enricher/init.py
+++ b/src/auto_archiver/modules/metadata_enricher/init.py
@ -0,0 +1 @@
+from .metadata_enricher import MetadataEnricher
--- a/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
+++ b/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
@ -2,7 +2,7 @@ import subprocess
 import traceback
 from loguru import logger

-from auto_archiver.base_modules import Enricher
+from auto_archiver.base_processors import Enricher
 from auto_archiver.core import Metadata


--- a/src/auto_archiver/modules/mute_formatter/init.py
+++ b/src/auto_archiver/modules/mute_formatter/init.py
@ -0,0 +1 @@
+from .mute_formatter import MuteFormatter
--- a/src/auto_archiver/modules/pdq_hash_enricher/init.py
+++ b/src/auto_archiver/modules/pdq_hash_enricher/init.py
@ -0,0 +1 @@
+from .pdq_hash_enricher import PdqHashEnricher
--- a/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
+++ b/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
@ -16,7 +16,7 @@ import numpy as np
 from PIL import Image, UnidentifiedImageError
 from loguru import logger

-from auto_archiver.base_modules import Enricher
+from auto_archiver.base_processors import Enricher
 from auto_archiver.core import Metadata


--- a/src/auto_archiver/modules/s3_storage/init.py
+++ b/src/auto_archiver/modules/s3_storage/init.py
@ -0,0 +1 @@
+from .s3 import S3Storage
--- a/src/auto_archiver/modules/s3_storage/manifest.py
+++ b/src/auto_archiver/modules/s3_storage/manifest.py
@ -1,4 +1,4 @@
-m = {
+{
    "name": "S3 Storage",
    "type": ["storage"],
    "requires_setup": True,
@ -6,29 +6,31 @@ m = {
        "python": ["boto3", "loguru"],
    },
    "configs": {
-                "path_generator": {
-                    "default": "url",
-                    "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
-                },
-                "filename_generator": {
-                    "default": "random",
-                    "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
-                },
-                "bucket": {"default": None, "help": "S3 bucket name"},
-                "region": {"default": None, "help": "S3 region name"},
-                "key": {"default": None, "help": "S3 API key"},
-                "secret": {"default": None, "help": "S3 API secret"},
-                "random_no_duplicate": {"default": False, "help": f"if set, it will override `path_generator`, `filename_generator` and `folder`. It will check if the file already exists and if so it will not upload it again. Creates a new root folder path `{NO_DUPLICATES_FOLDER}`"},
-                "endpoint_url": {
-                    "default": 'https://{region}.digitaloceanspaces.com',
-                    "help": "S3 bucket endpoint, {region} are inserted at runtime"
-                },
-                "cdn_url": {
-                    "default": 'https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}',
-                    "help": "S3 CDN url, {bucket}, {region} and {key} are inserted at runtime"
-                },
-                "private": {"default": False, "help": "if true S3 files will not be readable online"},
-            },
+        "path_generator": {
+            "default": "url",
+            "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
+            "choices": ["flat", "url", "random"],
+        },
+        "filename_generator": {
+            "default": "random",
+            "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
+            "choices": ["random", "static"],
+        },
+        "bucket": {"default": None, "help": "S3 bucket name"},
+        "region": {"default": None, "help": "S3 region name"},
+        "key": {"default": None, "help": "S3 API key"},
+        "secret": {"default": None, "help": "S3 API secret"},
+        "random_no_duplicate": {"default": False, "help": f"if set, it will override `path_generator`, `filename_generator` and `folder`. It will check if the file already exists and if so it will not upload it again. Creates a new root folder path `{NO_DUPLICATES_FOLDER}`"},
+        "endpoint_url": {
+            "default": 'https://{region}.digitaloceanspaces.com',
+            "help": "S3 bucket endpoint, {region} are inserted at runtime"
+        },
+        "cdn_url": {
+            "default": 'https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}',
+            "help": "S3 CDN url, {bucket}, {region} and {key} are inserted at runtime"
+        },
+        "private": {"default": False, "help": "if true S3 files will not be readable online"},
+    },
    "description": """
    S3Storage: A storage module for saving media files to an S3-compatible object storage.

--- a/src/auto_archiver/modules/s3_storage/s3.py
+++ b/src/auto_archiver/modules/s3_storage/s3.py
@ -4,7 +4,7 @@ import boto3, os

 from auto_archiver.utils.misc import random_str
 from auto_archiver.core import Media
-from auto_archiver.base_modules import Storage
+from auto_archiver.base_processors import Storage
 # TODO
 from auto_archiver.modules.hash_enricher import HashEnricher
 from loguru import logger
--- a/src/auto_archiver/modules/screenshot_enricher/init.py
+++ b/src/auto_archiver/modules/screenshot_enricher/init.py
@ -0,0 +1 @@
+from .screenshot_enricher import ScreenshotEnricher
--- a/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
+++ b/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
@ -5,7 +5,7 @@ import base64
 from selenium.common.exceptions import TimeoutException


-from auto_archiver.base_modules import Enricher
+from auto_archiver.base_processors import Enricher
 from auto_archiver.utils import Webdriver, UrlUtil, random_str
 from auto_archiver.core import Media, Metadata, ArchivingContext

--- a/src/auto_archiver/modules/ssl_enricher/init.py
+++ b/src/auto_archiver/modules/ssl_enricher/init.py
@ -0,0 +1 @@
+from .ssl_enricher import SSLEnricher
--- a/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
+++ b/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
@ -3,7 +3,7 @@ from slugify import slugify
 from urllib.parse import urlparse
 from loguru import logger

-from auto_archiver.base_modules import Enricher
+from auto_archiver.base_processors import Enricher
 from auto_archiver.core import Metadata, ArchivingContext, Media


--- a/src/auto_archiver/modules/telegram_extractor/init.py
+++ b/src/auto_archiver/modules/telegram_extractor/init.py
@ -0,0 +1 @@
+from .telegram_extractor import TelegramExtractor
--- a/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
+++ b/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
@ -2,7 +2,7 @@ import requests, re, html
 from bs4 import BeautifulSoup
 from loguru import logger

-from auto_archiver.base_modules import Extractor
+from auto_archiver.base_processors import Extractor
 from auto_archiver.core import Metadata, Media


--- a/src/auto_archiver/modules/telethon_extractor/init.py
+++ b/src/auto_archiver/modules/telethon_extractor/init.py
@ -0,0 +1 @@
+from .telethon_archiver import TelethonArchiver
--- a/src/auto_archiver/modules/telethon_extractor/manifest.py
+++ b/src/auto_archiver/modules/telethon_extractor/manifest.py
@ -1,4 +1,4 @@
-# TODO rm dependency on json
+import json
 {
    "name": "telethon_extractor",
    "type": ["extractor"],
@ -19,8 +19,7 @@
            "channel_invites": {
                "default": {},
                "help": "(JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup",
-                # TODO
-                "cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
+                "type": lambda x: json.loads(x),
            }
        },
    "description": """
--- a/src/auto_archiver/modules/telethon_extractor/telethon_archiver.py
+++ b/src/auto_archiver/modules/telethon_extractor/telethon_archiver.py
@ -8,7 +8,7 @@ from loguru import logger
 from tqdm import tqdm
 import re, time, json, os

-from auto_archiver.base_modules import Extractor
+from auto_archiver.base_processors import Extractor
 from auto_archiver.core import Metadata, Media, ArchivingContext
 from auto_archiver.utils import random_str

--- a/src/auto_archiver/modules/thumbnail_enricher/init.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/init.py
@ -0,0 +1 @@
+from .thumbnail_enricher import ThumbnailEnricher
--- a/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
@ -9,7 +9,7 @@ and identify important moments without watching the entire video.
 import ffmpeg, os
 from loguru import logger

-from auto_archiver.base_modules import Enricher
+from auto_archiver.base_processors import Enricher
 from auto_archiver.core import Media, Metadata, ArchivingContext
 from auto_archiver.utils.misc import random_str

--- a/src/auto_archiver/modules/timestamping_enricher/init.py
+++ b/src/auto_archiver/modules/timestamping_enricher/init.py
@ -0,0 +1 @@
+from .timestamping_enricher import TimestampingEnricher
--- a/src/auto_archiver/modules/timestamping_enricher/manifest.py
+++ b/src/auto_archiver/modules/timestamping_enricher/manifest.py
@ -21,7 +21,7 @@
                "http://tss.accv.es:8318/tsa"
            ],
            "help": "List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line.",
-            "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
+            "type": lambda val: set(val.split(",")),
        }
    },
    "description": """
--- a/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
+++ b/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
@ -8,9 +8,9 @@ from certvalidator import CertificateValidator, ValidationContext
 from asn1crypto import pem
 import certifi

-from auto_archiver.base_modules import Enricher
+from auto_archiver.base_processors import Enricher
 from auto_archiver.core import Metadata, ArchivingContext, Media
-from auto_archiver.base_modules import Extractor
+from auto_archiver.base_processors import Extractor


 class TimestampingEnricher(Enricher):
--- a/src/auto_archiver/modules/twitter_api_extractor/init.py
+++ b/src/auto_archiver/modules/twitter_api_extractor/init.py
@ -0,0 +1 @@
+from .twitter_api_archiver import TwitterApiExtractor
--- a/src/auto_archiver/modules/twitter_api_extractor/manifest.py
+++ b/src/auto_archiver/modules/twitter_api_extractor/manifest.py
@ -12,7 +12,7 @@
    "configs": {
            "bearer_token": {"default": None, "help": "[deprecated: see bearer_tokens] twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret"},
            "bearer_tokens": {"default": [], "help": " a list of twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret, if provided you can still add those for better rate limits. CSV of bearer tokens if provided via the command line",
-                              "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))},
+                              "type": lambda val: set(val.split(",")),},
            "consumer_key": {"default": None, "help": "twitter API consumer_key"},
            "consumer_secret": {"default": None, "help": "twitter API consumer_secret"},
            "access_token": {"default": None, "help": "twitter API access_token"},
--- a/src/auto_archiver/modules/twitter_api_extractor/twitter_api_archiver.py
+++ b/src/auto_archiver/modules/twitter_api_extractor/twitter_api_archiver.py
@ -8,7 +8,7 @@ from loguru import logger
 from pytwitter import Api
 from slugify import slugify

-from auto_archiver.base_modules import Extractor
+from auto_archiver.base_processors import Extractor
 from auto_archiver.core import Metadata,Media

 class TwitterApiExtractor(Extractor):
--- a/src/auto_archiver/modules/vk_extractor/init.py
+++ b/src/auto_archiver/modules/vk_extractor/init.py
@ -0,0 +1 @@
+from .vk_archiver import VkExtractor
--- a/src/auto_archiver/modules/vk_extractor/vk_archiver.py
+++ b/src/auto_archiver/modules/vk_extractor/vk_archiver.py
@ -2,7 +2,7 @@ from loguru import logger
 from vk_url_scraper import VkScraper

 from auto_archiver.utils.misc import dump_payload
-from auto_archiver.base_modules import Extractor
+from auto_archiver.base_processors import Extractor
 from auto_archiver.core import Metadata, Media, ArchivingContext


--- a/src/auto_archiver/modules/wacz_enricher/init.py
+++ b/src/auto_archiver/modules/wacz_enricher/init.py
@ -0,0 +1 @@
+from .wacz_enricher import WaczExtractorEnricher
--- a/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py
+++ b/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py
@ -6,7 +6,7 @@ from loguru import logger
 from warcio.archiveiterator import ArchiveIterator

 from auto_archiver.core import Media, Metadata, ArchivingContext
-from auto_archiver.base_modules import Extractor, Enricher
+from auto_archiver.base_processors import Extractor, Enricher
 from auto_archiver.utils import UrlUtil, random_str


--- a/src/auto_archiver/modules/wayback_enricher/init.py
+++ b/src/auto_archiver/modules/wayback_enricher/init.py
@ -0,0 +1 @@
+from .wayback_enricher import WaybackExtractorEnricher
--- a/src/auto_archiver/modules/wayback_enricher/wayback_enricher.py
+++ b/src/auto_archiver/modules/wayback_enricher/wayback_enricher.py
@ -2,7 +2,7 @@ import json
 from loguru import logger
 import time, requests

-from auto_archiver.base_modules import Extractor, Enricher
+from auto_archiver.base_processors import Extractor, Enricher
 from auto_archiver.utils import UrlUtil
 from auto_archiver.core import Metadata

--- a/src/auto_archiver/modules/whisper_enricher/init.py
+++ b/src/auto_archiver/modules/whisper_enricher/init.py
@ -0,0 +1 @@
+from .whisper_enricher import WhisperEnricher
--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@ -2,9 +2,9 @@ import traceback
 import requests, time
 from loguru import logger

-from auto_archiver.base_modules import Enricher
+from auto_archiver.base_processors import Enricher
 from auto_archiver.core import Metadata, Media, ArchivingContext
-from auto_archiver.modules import S3Storage
+from auto_archiver.modules.s3_storage import S3Storage


 class WhisperEnricher(Enricher):
--- a/tests/archivers/test_archiver_base.py
+++ b/tests/archivers/test_archiver_base.py
@ -1,7 +1,7 @@
 import pytest

 from auto_archiver.core.metadata import Metadata
-from auto_archiver.base_modules.extractor import Extractor
+from auto_archiver.base_processors.extractor import Extractor
 class TestArchiverBase(object):

    archiver_class: str = None
				`@ -0,0 +1 @@`
				`from .instagram_api_archiver import InstagramAPIExtractor`
				`@ -0,0 +1 @@`
				`from .instagram_archiver import InstagramExtractor`
				`@ -0,0 +1 @@`
				`from .instagram_tbot_archiver import InstagramTbotExtractor`
				`@ -0,0 +1 @@`
				`from .metadata_enricher import MetadataEnricher`
				`@ -0,0 +1 @@`
				`from .pdq_hash_enricher import PdqHashEnricher`
				`@ -0,0 +1 @@`
				`from .screenshot_enricher import ScreenshotEnricher`
				`@ -0,0 +1 @@`
				`from .telegram_extractor import TelegramExtractor`
				`@ -0,0 +1 @@`
				`from .telethon_archiver import TelethonArchiver`
				`@ -0,0 +1 @@`
				`from .thumbnail_enricher import ThumbnailEnricher`
				`@ -0,0 +1 @@`
				`from .timestamping_enricher import TimestampingEnricher`