Separate setup() and module_setup().

pull/189/head
erinhmclark 2025-02-10 18:07:47 +00:00
rodzic 2c3d1f591f
commit e97ccf8a73
14 zmienionych plików z 18 dodań i 18 usunięć

Wyświetl plik

@ -14,7 +14,7 @@ class BaseModule(ABC):
Base module class. All modules should inherit from this class. Base module class. All modules should inherit from this class.
The exact methods a class implements will depend on the type of module it is, The exact methods a class implements will depend on the type of module it is,
however all modules have a .setup(config: dict) method to run any setup code however modules can have a .setup() method to run any setup code
(e.g. logging in to a site, spinning up a browser etc.) (e.g. logging in to a site, spinning up a browser etc.)
See BaseModule.MODULE_TYPES for the types of modules you can create, noting that See BaseModule.MODULE_TYPES for the types of modules you can create, noting that
@ -60,7 +60,7 @@ class BaseModule(ABC):
def storages(self) -> list: def storages(self) -> list:
return self.config.get('storages', []) return self.config.get('storages', [])
def setup(self, config: dict): def config_setup(self, config: dict):
authentication = config.get('authentication', {}) authentication = config.get('authentication', {})
# extract out concatenated sites # extract out concatenated sites
@ -80,7 +80,7 @@ class BaseModule(ABC):
for key, val in config.get(self.name, {}).items(): for key, val in config.get(self.name, {}).items():
setattr(self, key, val) setattr(self, key, val)
def module_setup(self): def setup(self):
# For any additional setup required by modules, e.g. autehntication # For any additional setup required by modules, e.g. autehntication
pass pass

Wyświetl plik

@ -58,7 +58,7 @@ def get_module_lazy(module_name: str, suppress_warnings: bool = False) -> LazyBa
This has all the information about the module, but does not load the module itself or its dependencies This has all the information about the module, but does not load the module itself or its dependencies
To load an actual module, call .setup() on a laz module To load an actual module, call .setup() on a lazy module
""" """
if module_name in _LAZY_LOADED_MODULES: if module_name in _LAZY_LOADED_MODULES:
@ -241,8 +241,8 @@ class LazyBaseModule:
# merge the default config with the user config # merge the default config with the user config
default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default')) default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
config[self.name] = default_config | config.get(self.name, {}) config[self.name] = default_config | config.get(self.name, {})
instance.setup(config) instance.config_setup(config)
instance.module_setup() instance.setup()
return instance return instance
def __repr__(self): def __repr__(self):

Wyświetl plik

@ -19,7 +19,7 @@ from auto_archiver.core import Storage
class GDriveStorage(Storage): class GDriveStorage(Storage):
def module_setup(self) -> None: def setup(self) -> None:
self.scopes = ['https://www.googleapis.com/auth/drive'] self.scopes = ['https://www.googleapis.com/auth/drive']
# Initialize Google Drive service # Initialize Google Drive service
self._setup_google_drive_service() self._setup_google_drive_service()

Wyświetl plik

@ -21,7 +21,7 @@ from . import GWorksheet
class GsheetsFeeder(Feeder): class GsheetsFeeder(Feeder):
def module_setup(self) -> None: def setup(self) -> None:
self.gsheets_client = gspread.service_account(filename=self.service_account) self.gsheets_client = gspread.service_account(filename=self.service_account)
# TODO mv to validators # TODO mv to validators
assert self.sheet or self.sheet_id, ( assert self.sheet or self.sheet_id, (

Wyświetl plik

@ -17,7 +17,7 @@ class HtmlFormatter(Formatter):
environment: Environment = None environment: Environment = None
template: any = None template: any = None
def module_setup(self) -> None: def setup(self) -> None:
"""Sets up the Jinja2 environment and loads the template.""" """Sets up the Jinja2 environment and loads the template."""
template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/") template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")
self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True) self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True)

Wyświetl plik

@ -32,7 +32,7 @@ class InstagramAPIExtractor(Extractor):
r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?" r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?"
) )
def module_setup(self) -> None: def setup(self) -> None:
if self.api_endpoint[-1] == "/": if self.api_endpoint[-1] == "/":
self.api_endpoint = self.api_endpoint[:-1] self.api_endpoint = self.api_endpoint[:-1]

Wyświetl plik

@ -25,7 +25,7 @@ class InstagramExtractor(Extractor):
profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url)) profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url))
# TODO: links to stories # TODO: links to stories
def module_setup(self) -> None: def setup(self) -> None:
self.insta = instaloader.Instaloader( self.insta = instaloader.Instaloader(
download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}" download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}"

Wyświetl plik

@ -27,7 +27,7 @@ class InstagramTbotExtractor(Extractor):
https://t.me/instagram_load_bot https://t.me/instagram_load_bot
""" """
def module_setup(self) -> None: def setup(self) -> None:
""" """
1. makes a copy of session_file that is removed in cleanup 1. makes a copy of session_file that is removed in cleanup
2. checks if the session file is valid 2. checks if the session file is valid

Wyświetl plik

@ -13,7 +13,7 @@ NO_DUPLICATES_FOLDER = "no-dups/"
class S3Storage(Storage): class S3Storage(Storage):
def module_setup(self) -> None: def setup(self) -> None:
self.s3 = boto3.client( self.s3 = boto3.client(
's3', 's3',
region_name=self.region, region_name=self.region,

Wyświetl plik

@ -18,7 +18,7 @@ class TelethonExtractor(Extractor):
invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)") invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)")
def module_setup(self) -> None: def setup(self) -> None:
""" """
1. makes a copy of session_file that is removed in cleanup 1. makes a copy of session_file that is removed in cleanup

Wyświetl plik

@ -15,7 +15,7 @@ class TwitterApiExtractor(Extractor):
valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)") valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
def module_setup(self) -> None: def setup(self) -> None:
self.api_index = 0 self.api_index = 0
self.apis = [] self.apis = []
if len(self.bearer_tokens): if len(self.bearer_tokens):

Wyświetl plik

@ -12,7 +12,7 @@ class VkExtractor(Extractor):
Currently only works for /wall posts Currently only works for /wall posts
""" """
def module_setup(self) -> None: def setup(self) -> None:
self.vks = VkScraper(self.username, self.password, session_file=self.session_file) self.vks = VkScraper(self.username, self.password, session_file=self.session_file)
def download(self, item: Metadata) -> Metadata: def download(self, item: Metadata) -> Metadata:

Wyświetl plik

@ -18,7 +18,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
When used as an archiver it will extract the media from the .WACZ archive so it can be enriched. When used as an archiver it will extract the media from the .WACZ archive so it can be enriched.
""" """
def module_setup(self) -> None: def setup(self) -> None:
self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER') self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER')
self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER') self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER')

Wyświetl plik

@ -13,7 +13,7 @@ class WhisperEnricher(Enricher):
Only works if an S3 compatible storage is used Only works if an S3 compatible storage is used
""" """
def module_setup(self) -> None: def setup(self) -> None:
self.stores = self.config['steps']['storages'] self.stores = self.config['steps']['storages']
self.s3 = get_module("s3_storage", self.config) self.s3 = get_module("s3_storage", self.config)
if not "s3_storage" in self.stores: if not "s3_storage" in self.stores: