Separate setup() and module_setup().

pull/189/head
erinhmclark 2025-02-10 17:25:15 +00:00
rodzic c4bb667cec
commit 2c3d1f591f
14 zmienionych plików z 17 dodań i 26 usunięć

Wyświetl plik

@ -80,6 +80,10 @@ class BaseModule(ABC):
for key, val in config.get(self.name, {}).items(): for key, val in config.get(self.name, {}).items():
setattr(self, key, val) setattr(self, key, val)
def module_setup(self):
# For any additional setup required by modules, e.g. autehntication
pass
def auth_for_site(self, site: str, extract_cookies=True) -> Mapping[str, Any]: def auth_for_site(self, site: str, extract_cookies=True) -> Mapping[str, Any]:
""" """
Returns the authentication information for a given site. This is used to authenticate Returns the authentication information for a given site. This is used to authenticate

Wyświetl plik

@ -242,6 +242,7 @@ class LazyBaseModule:
default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default')) default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
config[self.name] = default_config | config.get(self.name, {}) config[self.name] = default_config | config.get(self.name, {})
instance.setup(config) instance.setup(config)
instance.module_setup()
return instance return instance
def __repr__(self): def __repr__(self):

Wyświetl plik

@ -19,9 +19,7 @@ from auto_archiver.core import Storage
class GDriveStorage(Storage): class GDriveStorage(Storage):
def setup(self, config: dict) -> None: def module_setup(self) -> None:
# Step 1: Call the BaseModule setup to dynamically assign configs
super().setup(config)
self.scopes = ['https://www.googleapis.com/auth/drive'] self.scopes = ['https://www.googleapis.com/auth/drive']
# Initialize Google Drive service # Initialize Google Drive service
self._setup_google_drive_service() self._setup_google_drive_service()

Wyświetl plik

@ -21,8 +21,7 @@ from . import GWorksheet
class GsheetsFeeder(Feeder): class GsheetsFeeder(Feeder):
def setup(self, config: dict): def module_setup(self) -> None:
super().setup(config)
self.gsheets_client = gspread.service_account(filename=self.service_account) self.gsheets_client = gspread.service_account(filename=self.service_account)
# TODO mv to validators # TODO mv to validators
assert self.sheet or self.sheet_id, ( assert self.sheet or self.sheet_id, (

Wyświetl plik

@ -17,9 +17,8 @@ class HtmlFormatter(Formatter):
environment: Environment = None environment: Environment = None
template: any = None template: any = None
def setup(self, config: dict) -> None: def module_setup(self) -> None:
"""Sets up the Jinja2 environment and loads the template.""" """Sets up the Jinja2 environment and loads the template."""
super().setup(config) # Ensure the base class logic is executed
template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/") template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")
self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True) self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True)

Wyświetl plik

@ -32,8 +32,7 @@ class InstagramAPIExtractor(Extractor):
r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?" r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?"
) )
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
if self.api_endpoint[-1] == "/": if self.api_endpoint[-1] == "/":
self.api_endpoint = self.api_endpoint[:-1] self.api_endpoint = self.api_endpoint[:-1]

Wyświetl plik

@ -25,8 +25,7 @@ class InstagramExtractor(Extractor):
profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url)) profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url))
# TODO: links to stories # TODO: links to stories
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.insta = instaloader.Instaloader( self.insta = instaloader.Instaloader(
download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}" download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}"

Wyświetl plik

@ -27,12 +27,11 @@ class InstagramTbotExtractor(Extractor):
https://t.me/instagram_load_bot https://t.me/instagram_load_bot
""" """
def setup(self, configs) -> None: def module_setup(self) -> None:
""" """
1. makes a copy of session_file that is removed in cleanup 1. makes a copy of session_file that is removed in cleanup
2. checks if the session file is valid 2. checks if the session file is valid
""" """
super().setup(configs)
logger.info(f"SETUP {self.name} checking login...") logger.info(f"SETUP {self.name} checking login...")
self._prepare_session_file() self._prepare_session_file()
self._initialize_telegram_client() self._initialize_telegram_client()

Wyświetl plik

@ -13,8 +13,7 @@ NO_DUPLICATES_FOLDER = "no-dups/"
class S3Storage(Storage): class S3Storage(Storage):
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.s3 = boto3.client( self.s3 = boto3.client(
's3', 's3',
region_name=self.region, region_name=self.region,

Wyświetl plik

@ -18,14 +18,13 @@ class TelethonExtractor(Extractor):
invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)") invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)")
def setup(self, config: dict) -> None: def module_setup(self) -> None:
""" """
1. makes a copy of session_file that is removed in cleanup 1. makes a copy of session_file that is removed in cleanup
2. trigger login process for telegram or proceed if already saved in a session file 2. trigger login process for telegram or proceed if already saved in a session file
3. joins channel_invites where needed 3. joins channel_invites where needed
""" """
super().setup(config)
logger.info(f"SETUP {self.name} checking login...") logger.info(f"SETUP {self.name} checking login...")
# make a copy of the session that is used exclusively with this archiver instance # make a copy of the session that is used exclusively with this archiver instance

Wyświetl plik

@ -15,9 +15,7 @@ class TwitterApiExtractor(Extractor):
valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)") valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.api_index = 0 self.api_index = 0
self.apis = [] self.apis = []
if len(self.bearer_tokens): if len(self.bearer_tokens):

Wyświetl plik

@ -12,8 +12,7 @@ class VkExtractor(Extractor):
Currently only works for /wall posts Currently only works for /wall posts
""" """
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.vks = VkScraper(self.username, self.password, session_file=self.session_file) self.vks = VkScraper(self.username, self.password, session_file=self.session_file)
def download(self, item: Metadata) -> Metadata: def download(self, item: Metadata) -> Metadata:

Wyświetl plik

@ -18,8 +18,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
When used as an archiver it will extract the media from the .WACZ archive so it can be enriched. When used as an archiver it will extract the media from the .WACZ archive so it can be enriched.
""" """
def setup(self, configs) -> None: def module_setup(self) -> None:
super().setup(configs)
self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER') self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER')
self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER') self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER')

Wyświetl plik

@ -13,8 +13,7 @@ class WhisperEnricher(Enricher):
Only works if an S3 compatible storage is used Only works if an S3 compatible storage is used
""" """
def setup(self, config: dict) -> None: def module_setup(self) -> None:
super().setup(config)
self.stores = self.config['steps']['storages'] self.stores = self.config['steps']['storages']
self.s3 = get_module("s3_storage", self.config) self.s3 = get_module("s3_storage", self.config)
if not "s3_storage" in self.stores: if not "s3_storage" in self.stores: