From e3074013d01cae74f722732124d188871a43f7fc Mon Sep 17 00:00:00 2001 From: Patrick Robertson Date: Mon, 27 Jan 2025 14:28:04 +0100 Subject: [PATCH] Fix loading/saving to orchestration file with comments --- src/auto_archiver/core/config.py | 52 +++++++------------------- src/auto_archiver/core/module.py | 8 ++-- src/auto_archiver/core/orchestrator.py | 7 ++-- 3 files changed, 22 insertions(+), 45 deletions(-) diff --git a/src/auto_archiver/core/config.py b/src/auto_archiver/core/config.py index 81a1c10..f724828 100644 --- a/src/auto_archiver/core/config.py +++ b/src/auto_archiver/core/config.py @@ -13,47 +13,23 @@ from .module import MODULE_TYPES from typing import Any, List, Type -# configurable_parents = [ -# Feeder, -# Enricher, -# Extractor, -# Database, -# Storage, -# Formatter -# # Util -# ] -# feeder: Feeder -# formatter: Formatter -# extractors: List[Extractor] = field(default_factory=[]) -# enrichers: List[Enricher] = field(default_factory=[]) -# storages: List[Storage] = field(default_factory=[]) -# databases: List[Database] = field(default_factory=[]) +yaml = YAML() -# def __init__(self) -> None: -# self.defaults = {} -# self.cli_ops = {} -# self.config = {} +EMPTY_CONFIG = yaml.load(""" +# Auto Archiver Configuration +# Steps are the modules that will be run in the order they are defined - # def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs: str = {}): - # """ - # if yaml_config_filename is provided, the --config argument is ignored, - # useful for library usage when the config values are preloaded - # overwrite_configs is a dict that overwrites the yaml file contents - # """ - # # 1. parse CLI values - # if use_cli: - # parser = argparse.ArgumentParser( - # # prog = "auto-archiver", - # description="Auto Archiver is a CLI tool to archive media/metadata from online URLs; it can read URLs from many sources (Google Sheets, Command Line, ...); and write results to many destinations too (CSV, Google Sheets, MongoDB, ...)!", - # epilog="Check the code at https://github.com/bellingcat/auto-archiver" - # ) +steps:""" + "".join([f"\n {module}s: []" for module in MODULE_TYPES]) + \ +""" - # parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml') - # parser.add_argument('--version', action='version', version=__version__) +# Global configuration +# These are the global configurations that are used by the modules + +logging: + level: INFO +""") +# note: 'logging' is explicitly added above in order to better format the config file -EMPTY_CONFIG = CommentedMap(**{ - "steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES) -}) def to_dot_notation(yaml_conf: CommentedMap | dict) -> argparse.ArgumentParser: dotdict = {} @@ -112,8 +88,6 @@ def merge_dicts(dotdict: dict, yaml_dict: CommentedMap) -> CommentedMap: return yaml_dict -yaml = YAML() - def read_yaml(yaml_filename: str) -> CommentedMap: config = None try: diff --git a/src/auto_archiver/core/module.py b/src/auto_archiver/core/module.py index 96a8e5e..29f9769 100644 --- a/src/auto_archiver/core/module.py +++ b/src/auto_archiver/core/module.py @@ -100,7 +100,6 @@ class LazyBaseModule: name: str display_name: str type: list - requires_setup: bool description: str path: str @@ -111,7 +110,7 @@ class LazyBaseModule: def __init__(self, module_name, path): self.name = module_name self.path = path - + @property def entry_point(self): if not self._entry_point and not self.manifest['entry_point']: @@ -126,6 +125,10 @@ class LazyBaseModule: @property def configs(self): return self.manifest['configs'] + + @property + def requires_setup(self): + return self.manifest['requires_setup'] @property def manifest(self): @@ -145,7 +148,6 @@ class LazyBaseModule: self.display_name = manifest['name'] self.type = manifest['type'] self._entry_point = manifest['entry_point'] - self.requires_setup = manifest['requires_setup'] self.description = manifest['description'] return manifest diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index 2a5cf4a..967f652 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -168,13 +168,14 @@ class ArchivingOrchestrator: # TODO: go through all the manifests and make sure we're not breaking anything with removing cli_set # in most cases it'll mean replacing it with 'type': 'str' or 'type': 'int' or something kwargs.pop('cli_set', None) - + should_store = kwargs.pop('should_store', False) kwargs['dest'] = f"{module.name}.{kwargs.pop('dest', name)}" try: kwargs['type'] = __builtins__.get(kwargs.get('type'), str) except KeyError: kwargs['type'] = getattr(validators, kwargs['type']) - group.add_argument(f"--{module.name}.{name}", **kwargs) + arg = group.add_argument(f"--{module.name}.{name}", **kwargs) + arg.should_store = should_store def show_help(self): # for the help message, we want to load *all* possible modules and show the help @@ -255,7 +256,7 @@ class ArchivingOrchestrator: exit() yaml_config = read_yaml(basic_config.config_file) - + self.setup_complete_parser(basic_config, yaml_config, unused_args)