Separate setup() and module_setup().

pull/189/head
erinhmclark 2025-02-10 17:25:15 +00:00
rodzic c4bb667cec
commit 2c3d1f591f
14 zmienionych plików z 17 dodań i 26 usunięć

Wyświetl plik

@ -80,6 +80,10 @@ class BaseModule(ABC):
for key, val in config.get(self.name, {}).items():
setattr(self, key, val)
def module_setup(self):
# For any additional setup required by modules, e.g. autehntication
pass
def auth_for_site(self, site: str, extract_cookies=True) -> Mapping[str, Any]:
"""
Returns the authentication information for a given site. This is used to authenticate

Wyświetl plik

@ -242,6 +242,7 @@ class LazyBaseModule:
default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
config[self.name] = default_config | config.get(self.name, {})
instance.setup(config)
instance.module_setup()
return instance
def __repr__(self):

Wyświetl plik

@ -19,9 +19,7 @@ from auto_archiver.core import Storage
class GDriveStorage(Storage):
def setup(self, config: dict) -> None:
# Step 1: Call the BaseModule setup to dynamically assign configs
super().setup(config)
def module_setup(self) -> None:
self.scopes = ['https://www.googleapis.com/auth/drive']
# Initialize Google Drive service
self._setup_google_drive_service()

Wyświetl plik

@ -21,8 +21,7 @@ from . import GWorksheet
class GsheetsFeeder(Feeder):
def setup(self, config: dict):
super().setup(config)
def module_setup(self) -> None:
self.gsheets_client = gspread.service_account(filename=self.service_account)
# TODO mv to validators
assert self.sheet or self.sheet_id, (

Wyświetl plik

@ -17,9 +17,8 @@ class HtmlFormatter(Formatter):
environment: Environment = None
template: any = None
def setup(self, config: dict) -> None:
def module_setup(self) -> None:
"""Sets up the Jinja2 environment and loads the template."""
super().setup(config) # Ensure the base class logic is executed
template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")
self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True)

Wyświetl plik

@ -32,8 +32,7 @@ class InstagramAPIExtractor(Extractor):
r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?"
)
def setup(self, config: dict) -> None:
super().setup(config)
def module_setup(self) -> None:
if self.api_endpoint[-1] == "/":
self.api_endpoint = self.api_endpoint[:-1]

Wyświetl plik

@ -25,8 +25,7 @@ class InstagramExtractor(Extractor):
profile_pattern = re.compile(r"{valid_url}(\w+)".format(valid_url=valid_url))
# TODO: links to stories
def setup(self, config: dict) -> None:
super().setup(config)
def module_setup(self) -> None:
self.insta = instaloader.Instaloader(
download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}"

Wyświetl plik

@ -27,12 +27,11 @@ class InstagramTbotExtractor(Extractor):
https://t.me/instagram_load_bot
"""
def setup(self, configs) -> None:
def module_setup(self) -> None:
"""
1. makes a copy of session_file that is removed in cleanup
2. checks if the session file is valid
"""
super().setup(configs)
logger.info(f"SETUP {self.name} checking login...")
self._prepare_session_file()
self._initialize_telegram_client()

Wyświetl plik

@ -13,8 +13,7 @@ NO_DUPLICATES_FOLDER = "no-dups/"
class S3Storage(Storage):
def setup(self, config: dict) -> None:
super().setup(config)
def module_setup(self) -> None:
self.s3 = boto3.client(
's3',
region_name=self.region,

Wyświetl plik

@ -18,14 +18,13 @@ class TelethonExtractor(Extractor):
invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)")
def setup(self, config: dict) -> None:
def module_setup(self) -> None:
"""
1. makes a copy of session_file that is removed in cleanup
2. trigger login process for telegram or proceed if already saved in a session file
3. joins channel_invites where needed
"""
super().setup(config)
logger.info(f"SETUP {self.name} checking login...")
# make a copy of the session that is used exclusively with this archiver instance

Wyświetl plik

@ -15,9 +15,7 @@ class TwitterApiExtractor(Extractor):
valid_url: re.Pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
def setup(self, config: dict) -> None:
super().setup(config)
def module_setup(self) -> None:
self.api_index = 0
self.apis = []
if len(self.bearer_tokens):

Wyświetl plik

@ -12,8 +12,7 @@ class VkExtractor(Extractor):
Currently only works for /wall posts
"""
def setup(self, config: dict) -> None:
super().setup(config)
def module_setup(self) -> None:
self.vks = VkScraper(self.username, self.password, session_file=self.session_file)
def download(self, item: Metadata) -> Metadata:

Wyświetl plik

@ -18,8 +18,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
When used as an archiver it will extract the media from the .WACZ archive so it can be enriched.
"""
def setup(self, configs) -> None:
super().setup(configs)
def module_setup(self) -> None:
self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER')
self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER')

Wyświetl plik

@ -13,8 +13,7 @@ class WhisperEnricher(Enricher):
Only works if an S3 compatible storage is used
"""
def setup(self, config: dict) -> None:
super().setup(config)
def module_setup(self) -> None:
self.stores = self.config['steps']['storages']
self.s3 = get_module("s3_storage", self.config)
if not "s3_storage" in self.stores: