kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix setting cli_feeder as default feeder on clean install
rodzic
1fad37fd93
commit
e9dd321dcd
|
@ -36,6 +36,7 @@ steps:""" + "".join([f"\n {module}s: []" for module in BaseModule.MODULE_TYPES
|
|||
# a dictionary of authentication information that can be used by extractors to login to website.
|
||||
# you can use a comma separated list for multiple domains on the same line (common usecase: x.com,twitter.com)
|
||||
# Common login 'types' are username/password, cookie, api key/token.
|
||||
# There are two special keys for using cookies, they are: cookies_file and cookies_from_browser.
|
||||
# Some Examples:
|
||||
# facebook.com:
|
||||
# username: "my_username"
|
||||
|
@ -163,6 +164,6 @@ def read_yaml(yaml_filename: str) -> CommentedMap:
|
|||
def store_yaml(config: CommentedMap, yaml_filename: str) -> None:
|
||||
config_to_save = deepcopy(config)
|
||||
|
||||
config.pop('urls', None)
|
||||
config_to_save.pop('urls', None)
|
||||
with open(yaml_filename, "w", encoding="utf-8") as outf:
|
||||
yaml.dump(config_to_save, outf)
|
|
@ -128,6 +128,10 @@ class ArchivingOrchestrator:
|
|||
elif basic_config.mode == 'simple':
|
||||
simple_modules = [module for module in available_modules(with_manifest=True) if not module.requires_setup]
|
||||
self.add_module_args(simple_modules, parser)
|
||||
|
||||
# for simple mode, we use the cli_feeder and any modules that don't require setup
|
||||
yaml_config['steps']['feeders'] = ['cli_feeder']
|
||||
|
||||
# add them to the config
|
||||
for module in simple_modules:
|
||||
for module_type in module.type:
|
||||
|
@ -237,18 +241,18 @@ class ArchivingOrchestrator:
|
|||
if log_file := logging_config['file']:
|
||||
logger.add(log_file) if not logging_config['rotation'] else logger.add(log_file, rotation=logging_config['rotation'])
|
||||
|
||||
|
||||
def install_modules(self):
|
||||
def install_modules(self, modules_by_type):
|
||||
"""
|
||||
Swaps out the previous 'strings' in the config with the actual modules and loads them
|
||||
Traverses all modules in 'steps' and loads them into the orchestrator, storing them in the
|
||||
orchestrator's attributes (self.feeders, self.extractors etc.). If no modules of a certain type
|
||||
are loaded, the program will exit with an error message.
|
||||
"""
|
||||
|
||||
invalid_modules = []
|
||||
for module_type in BaseModule.MODULE_TYPES:
|
||||
|
||||
step_items = []
|
||||
modules_to_load = self.config['steps'][f"{module_type}s"]
|
||||
|
||||
modules_to_load = modules_by_type[f"{module_type}s"]
|
||||
assert modules_to_load, f"No {module_type}s were configured. Make sure to set at least one {module_type} in your configuration file or on the command line (using --{module_type}s)"
|
||||
|
||||
def check_steps_ok():
|
||||
|
@ -264,9 +268,11 @@ class ArchivingOrchestrator:
|
|||
|
||||
for module in modules_to_load:
|
||||
if module == 'cli_feeder':
|
||||
# pseudo module, don't load it
|
||||
breakpoint()
|
||||
urls = self.config['urls']
|
||||
if not urls:
|
||||
logger.error("No URLs provided. Please provide at least one URL to archive, or set up a feeder. Use --help for more information.")
|
||||
logger.error("No URLs provided. Please provide at least one URL via the command line, or set up an alternative feeder. Use --help for more information.")
|
||||
exit()
|
||||
# cli_feeder is a pseudo module, it just takes the command line args
|
||||
def feed(self) -> Generator[Metadata]:
|
||||
|
@ -330,7 +336,7 @@ class ArchivingOrchestrator:
|
|||
self.setup_complete_parser(basic_config, yaml_config, unused_args)
|
||||
|
||||
logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
|
||||
self.install_modules()
|
||||
self.install_modules(self.config['steps'])
|
||||
|
||||
# log out the modules that were loaded
|
||||
for module_type in BaseModule.MODULE_TYPES:
|
||||
|
|
Ładowanie…
Reference in New Issue