Fix loading/saving to orchestration file with comments

pull/224/head
Patrick Robertson 2025-01-27 14:28:04 +01:00
rodzic f68e2726f2
commit e3074013d0
3 zmienionych plików z 22 dodań i 45 usunięć

Wyświetl plik

@ -13,47 +13,23 @@ from .module import MODULE_TYPES
from typing import Any, List, Type
# configurable_parents = [
# Feeder,
# Enricher,
# Extractor,
# Database,
# Storage,
# Formatter
# # Util
# ]
# feeder: Feeder
# formatter: Formatter
# extractors: List[Extractor] = field(default_factory=[])
# enrichers: List[Enricher] = field(default_factory=[])
# storages: List[Storage] = field(default_factory=[])
# databases: List[Database] = field(default_factory=[])
yaml = YAML()
# def __init__(self) -> None:
# self.defaults = {}
# self.cli_ops = {}
# self.config = {}
EMPTY_CONFIG = yaml.load("""
# Auto Archiver Configuration
# Steps are the modules that will be run in the order they are defined
# def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs: str = {}):
# """
# if yaml_config_filename is provided, the --config argument is ignored,
# useful for library usage when the config values are preloaded
# overwrite_configs is a dict that overwrites the yaml file contents
# """
# # 1. parse CLI values
# if use_cli:
# parser = argparse.ArgumentParser(
# # prog = "auto-archiver",
# description="Auto Archiver is a CLI tool to archive media/metadata from online URLs; it can read URLs from many sources (Google Sheets, Command Line, ...); and write results to many destinations too (CSV, Google Sheets, MongoDB, ...)!",
# epilog="Check the code at https://github.com/bellingcat/auto-archiver"
# )
steps:""" + "".join([f"\n {module}s: []" for module in MODULE_TYPES]) + \
"""
# parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
# parser.add_argument('--version', action='version', version=__version__)
# Global configuration
# These are the global configurations that are used by the modules
logging:
level: INFO
""")
# note: 'logging' is explicitly added above in order to better format the config file
EMPTY_CONFIG = CommentedMap(**{
"steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES)
})
def to_dot_notation(yaml_conf: CommentedMap | dict) -> argparse.ArgumentParser:
dotdict = {}
@ -112,8 +88,6 @@ def merge_dicts(dotdict: dict, yaml_dict: CommentedMap) -> CommentedMap:
return yaml_dict
yaml = YAML()
def read_yaml(yaml_filename: str) -> CommentedMap:
config = None
try:

Wyświetl plik

@ -100,7 +100,6 @@ class LazyBaseModule:
name: str
display_name: str
type: list
requires_setup: bool
description: str
path: str
@ -111,7 +110,7 @@ class LazyBaseModule:
def __init__(self, module_name, path):
self.name = module_name
self.path = path
@property
def entry_point(self):
if not self._entry_point and not self.manifest['entry_point']:
@ -126,6 +125,10 @@ class LazyBaseModule:
@property
def configs(self):
return self.manifest['configs']
@property
def requires_setup(self):
return self.manifest['requires_setup']
@property
def manifest(self):
@ -145,7 +148,6 @@ class LazyBaseModule:
self.display_name = manifest['name']
self.type = manifest['type']
self._entry_point = manifest['entry_point']
self.requires_setup = manifest['requires_setup']
self.description = manifest['description']
return manifest

Wyświetl plik

@ -168,13 +168,14 @@ class ArchivingOrchestrator:
# TODO: go through all the manifests and make sure we're not breaking anything with removing cli_set
# in most cases it'll mean replacing it with 'type': 'str' or 'type': 'int' or something
kwargs.pop('cli_set', None)
should_store = kwargs.pop('should_store', False)
kwargs['dest'] = f"{module.name}.{kwargs.pop('dest', name)}"
try:
kwargs['type'] = __builtins__.get(kwargs.get('type'), str)
except KeyError:
kwargs['type'] = getattr(validators, kwargs['type'])
group.add_argument(f"--{module.name}.{name}", **kwargs)
arg = group.add_argument(f"--{module.name}.{name}", **kwargs)
arg.should_store = should_store
def show_help(self):
# for the help message, we want to load *all* possible modules and show the help
@ -255,7 +256,7 @@ class ArchivingOrchestrator:
exit()
yaml_config = read_yaml(basic_config.config_file)
self.setup_complete_parser(basic_config, yaml_config, unused_args)