Fix setting cli_feeder as default feeder on clean install

pull/189/head
Patrick Robertson 2025-02-10 13:06:24 +01:00
rodzic 1fad37fd93
commit e9dd321dcd
2 zmienionych plików z 15 dodań i 8 usunięć

Wyświetl plik

@ -36,6 +36,7 @@ steps:""" + "".join([f"\n {module}s: []" for module in BaseModule.MODULE_TYPES
# a dictionary of authentication information that can be used by extractors to login to website.
# you can use a comma separated list for multiple domains on the same line (common usecase: x.com,twitter.com)
# Common login 'types' are username/password, cookie, api key/token.
# There are two special keys for using cookies, they are: cookies_file and cookies_from_browser.
# Some Examples:
# facebook.com:
# username: "my_username"
@ -163,6 +164,6 @@ def read_yaml(yaml_filename: str) -> CommentedMap:
def store_yaml(config: CommentedMap, yaml_filename: str) -> None:
config_to_save = deepcopy(config)
config.pop('urls', None)
config_to_save.pop('urls', None)
with open(yaml_filename, "w", encoding="utf-8") as outf:
yaml.dump(config_to_save, outf)

Wyświetl plik

@ -128,6 +128,10 @@ class ArchivingOrchestrator:
elif basic_config.mode == 'simple':
simple_modules = [module for module in available_modules(with_manifest=True) if not module.requires_setup]
self.add_module_args(simple_modules, parser)
# for simple mode, we use the cli_feeder and any modules that don't require setup
yaml_config['steps']['feeders'] = ['cli_feeder']
# add them to the config
for module in simple_modules:
for module_type in module.type:
@ -237,18 +241,18 @@ class ArchivingOrchestrator:
if log_file := logging_config['file']:
logger.add(log_file) if not logging_config['rotation'] else logger.add(log_file, rotation=logging_config['rotation'])
def install_modules(self):
def install_modules(self, modules_by_type):
"""
Swaps out the previous 'strings' in the config with the actual modules and loads them
Traverses all modules in 'steps' and loads them into the orchestrator, storing them in the
orchestrator's attributes (self.feeders, self.extractors etc.). If no modules of a certain type
are loaded, the program will exit with an error message.
"""
invalid_modules = []
for module_type in BaseModule.MODULE_TYPES:
step_items = []
modules_to_load = self.config['steps'][f"{module_type}s"]
modules_to_load = modules_by_type[f"{module_type}s"]
assert modules_to_load, f"No {module_type}s were configured. Make sure to set at least one {module_type} in your configuration file or on the command line (using --{module_type}s)"
def check_steps_ok():
@ -264,9 +268,11 @@ class ArchivingOrchestrator:
for module in modules_to_load:
if module == 'cli_feeder':
# pseudo module, don't load it
breakpoint()
urls = self.config['urls']
if not urls:
logger.error("No URLs provided. Please provide at least one URL to archive, or set up a feeder. Use --help for more information.")
logger.error("No URLs provided. Please provide at least one URL via the command line, or set up an alternative feeder. Use --help for more information.")
exit()
# cli_feeder is a pseudo module, it just takes the command line args
def feed(self) -> Generator[Metadata]:
@ -330,7 +336,7 @@ class ArchivingOrchestrator:
self.setup_complete_parser(basic_config, yaml_config, unused_args)
logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
self.install_modules()
self.install_modules(self.config['steps'])
# log out the modules that were loaded
for module_type in BaseModule.MODULE_TYPES: