Separate setup() and module_setup().

2025-02-10 18:07:47 +00:00 · 2025-02-10 18:07:47 +00:00 · e97ccf8a73
commit e97ccf8a73
--- a/src/auto_archiver/core/base_module.py
+++ b/src/auto_archiver/core/base_module.py
@ -14,7 +14,7 @@ class BaseModule(ABC):
    Base module class. All modules should inherit from this class.

    The exact methods a class implements will depend on the type of module it is,
-    however all modules have a .setup(config: dict) method to run any setup code
+    however modules can have a .setup() method to run any setup code
    (e.g. logging in to a site, spinning up a browser etc.)

    See BaseModule.MODULE_TYPES for the types of modules you can create, noting that
@ -60,7 +60,7 @@ class BaseModule(ABC):
    def storages(self) -> list:
        return self.config.get('storages', [])

-    def setup(self, config: dict):
+    def config_setup(self, config: dict):

        authentication = config.get('authentication', {})
        # extract out concatenated sites
@ -80,7 +80,7 @@ class BaseModule(ABC):
        for key, val in config.get(self.name, {}).items():
            setattr(self, key, val)

-    def module_setup(self):
+    def setup(self):
        # For any additional setup required by modules, e.g. autehntication
        pass

--- a/src/auto_archiver/core/module.py
+++ b/src/auto_archiver/core/module.py
@ -58,7 +58,7 @@ def get_module_lazy(module_name: str, suppress_warnings: bool = False) -> LazyBa
    
    This has all the information about the module, but does not load the module itself or its dependencies
    
-    To load an actual module, call .setup() on a laz module
+    To load an actual module, call .setup() on a lazy module
    
    """
    if module_name in _LAZY_LOADED_MODULES:
@ -241,8 +241,8 @@ class LazyBaseModule:
        # merge the default config with the user config
        default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
        config[self.name] = default_config  | config.get(self.name, {})
-        instance.setup(config)
-        instance.module_setup()
+        instance.config_setup(config)
+        instance.setup()
        return instance

    def __repr__(self):
--- a/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
+++ b/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
@ -19,7 +19,7 @@ from auto_archiver.core import Storage

 class GDriveStorage(Storage):

-    def module_setup(self) -> None:
+    def setup(self) -> None:
        self.scopes = ['https://www.googleapis.com/auth/drive']
        # Initialize Google Drive service
        self._setup_google_drive_service()
--- a/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py
+++ b/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py
@ -21,7 +21,7 @@ from . import GWorksheet

 class GsheetsFeeder(Feeder):

-    def module_setup(self) -> None:
+    def setup(self) -> None:
        self.gsheets_client = gspread.service_account(filename=self.service_account)
        # TODO mv to validators
        assert self.sheet or self.sheet_id, (
--- a/src/auto_archiver/modules/html_formatter/html_formatter.py
+++ b/src/auto_archiver/modules/html_formatter/html_formatter.py
@ -17,7 +17,7 @@ class HtmlFormatter(Formatter):
    environment: Environment = None
    template: any = None

-    def module_setup(self) -> None:
+    def setup(self) -> None:
        """Sets up the Jinja2 environment and loads the template."""
        template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")
        self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True)
--- a/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
+++ b/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
@ -32,7 +32,7 @@ class InstagramAPIExtractor(Extractor):
        r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?"
    )

-    def module_setup(self) -> None:
+    def setup(self) -> None:
        if self.api_endpoint[-1] == "/":
            self.api_endpoint = self.api_endpoint[:-1]

--- a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
+++ b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
@ -25,7 +25,7 @@ class InstagramExtractor(Extractor):
    profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url))
    # TODO: links to stories

-    def module_setup(self) -> None:
+    def setup(self) -> None:

        self.insta = instaloader.Instaloader(
            download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}"
--- a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
+++ b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
@ -27,7 +27,7 @@ class InstagramTbotExtractor(Extractor):
    https://t.me/instagram_load_bot
    """

-    def module_setup(self) -> None:
+    def setup(self) -> None:
        """
        1. makes a copy of session_file that is removed in cleanup
        2. checks if the session file is valid
--- a/src/auto_archiver/modules/s3_storage/s3_storage.py
+++ b/src/auto_archiver/modules/s3_storage/s3_storage.py
@ -13,7 +13,7 @@ NO_DUPLICATES_FOLDER = "no-dups/"

 class S3Storage(Storage):

-    def module_setup(self) -> None:
+    def setup(self) -> None:
        self.s3 = boto3.client(
            's3',
            region_name=self.region,
--- a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
+++ b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
@ -18,7 +18,7 @@ class TelethonExtractor(Extractor):
    invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)")


-    def module_setup(self) -> None:
+    def setup(self) -> None:

        """
        1. makes a copy of session_file that is removed in cleanup
--- a/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
+++ b/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
@ -15,7 +15,7 @@ class TwitterApiExtractor(Extractor):

    valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")

-    def module_setup(self) -> None:
+    def setup(self) -> None:
        self.api_index = 0
        self.apis = []
        if len(self.bearer_tokens):
--- a/src/auto_archiver/modules/vk_extractor/vk_extractor.py
+++ b/src/auto_archiver/modules/vk_extractor/vk_extractor.py
@ -12,7 +12,7 @@ class VkExtractor(Extractor):
    Currently only works for /wall posts
    """

-    def module_setup(self) -> None:
+    def setup(self) -> None:
        self.vks = VkScraper(self.username, self.password, session_file=self.session_file)

    def download(self, item: Metadata) -> Metadata:
--- a/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py
+++ b/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py
@ -18,7 +18,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
    When used as an archiver it will extract the media from the .WACZ archive so it can be enriched.
    """

-    def module_setup(self) -> None:
+    def setup(self) -> None:

        self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER')
        self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER')
--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@ -13,7 +13,7 @@ class WhisperEnricher(Enricher):
    Only works if an S3 compatible storage is used
    """

-    def module_setup(self) -> None:
+    def setup(self) -> None:
        self.stores = self.config['steps']['storages']
        self.s3 = get_module("s3_storage", self.config)
        if not "s3_storage" in self.stores: