From b6b085854c0f417101bafd019c8f66949883fe6c Mon Sep 17 00:00:00 2001 From: Patrick Robertson Date: Wed, 22 Jan 2025 17:40:51 +0100 Subject: [PATCH] Switch back to using yaml with dot notation (two simple helper functions to convert between dot and dict notation) --- src/auto_archiver/core/config.py | 64 ++++++++++++++++++++------ src/auto_archiver/core/loader.py | 2 +- src/auto_archiver/core/orchestrator.py | 50 ++++++++++---------- 3 files changed, 77 insertions(+), 39 deletions(-) diff --git a/src/auto_archiver/core/config.py b/src/auto_archiver/core/config.py index db5b6d2..9709be6 100644 --- a/src/auto_archiver/core/config.py +++ b/src/auto_archiver/core/config.py @@ -4,10 +4,13 @@ It supports CLI argument parsing, loading from YAML file, and overrides to allow flexible setup in various environments. """ -import argparse -from configparser import ConfigParser -from dataclasses import dataclass, field +import argparse +import yaml +from dataclasses import dataclass, field +from collections import OrderedDict + +from .loader import MODULE_TYPES # configurable_parents = [ # Feeder, @@ -47,21 +50,56 @@ from dataclasses import dataclass, field # parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml') # parser.add_argument('--version', action='version', version=__version__) - +EMPTY_CONFIG = { + "steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES) +} class LoadFromFile (argparse.Action): def __call__ (self, parser, namespace, values, option_string = None): with values as f: # parse arguments in the file and store them in the target namespace parser.parse_args(f.read().split(), namespace) -def read_config(config_filename: str) -> dict: - config = ConfigParser() - config.read(config_filename) - # setup basic format - if 'STEPS' not in config.sections(): - config.add_section("STEPS") +def to_dot_notation(yaml_conf: str) -> argparse.ArgumentParser: + dotdict = {} + for step, vals in yaml_conf.pop('steps', {}).items(): + if vals: + dotdict[f"{step}s"] = vals + + def process_subdict(subdict, prefix=""): + for key, value in subdict.items(): + if type(value) == dict: + process_subdict(value, f"{prefix}{key}.") + else: + dotdict[f"{prefix}{key}"] = value + + process_subdict(yaml_conf) + return dotdict + +def merge_dicts(dotdict, yaml_dict): + def process_subdict(subdict, prefix=""): + for key, value in subdict.items(): + if "." in key: + keys = key.split(".") + subdict = yaml_dict + for k in keys[:-1]: + subdict = subdict.setdefault(k, {}) + subdict[keys[-1]] = value + else: + yaml_dict[key] = value + + process_subdict(dotdict) + return yaml_dict + +def read_yaml(yaml_filename: str) -> dict: + + try: + with open(yaml_filename, "r", encoding="utf-8") as inf: + config = yaml.safe_load(inf) + except FileNotFoundError: + config = EMPTY_CONFIG + return config -def store_config(config: ConfigParser, config_filename: str): - with open(config_filename, "w", encoding="utf-8") as outf: - config.write(outf) \ No newline at end of file +def store_yaml(config: dict, yaml_filename: str): + with open(yaml_filename, "w", encoding="utf-8") as outf: + yaml.dump(config, outf, default_flow_style=False) \ No newline at end of file diff --git a/src/auto_archiver/core/loader.py b/src/auto_archiver/core/loader.py index d39f31e..4460349 100644 --- a/src/auto_archiver/core/loader.py +++ b/src/auto_archiver/core/loader.py @@ -62,7 +62,7 @@ class Module: def load_manifest(module_path): - print(f"Loading manifest for module {module_path}") + # print(f"Loading manifest for module {module_path}") # load the manifest file manifest = copy.deepcopy(_DEFAULT_MANIFEST) diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index 0a2273f..f8df659 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -9,7 +9,6 @@ from typing import Generator, Union, List from urllib.parse import urlparse from ipaddress import ip_address import argparse -import configparser import os from os.path import join, dirname @@ -25,7 +24,7 @@ from ..enrichers import Enricher from ..databases import Database from .metadata import Metadata from ..version import __version__ -from .config import read_config, store_config +from .config import read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG from .loader import available_modules, Module, MODULE_TYPES import tempfile, traceback @@ -69,24 +68,23 @@ class ArchivingOrchestrator: parser.add_argument('-s', '--store', action='store_true', dest='store', help='Store the created config in the config file') self.basic_parser = parser - def setup_complete_parser(self, basic_config: dict, ini_config: dict, unused_args: list[str]) -> None: + def setup_complete_parser(self, basic_config: dict, yaml_config: dict, unused_args: list[str]) -> None: parser = argparse.ArgumentParser( parents = [self.basic_parser], add_help=False, ) - + self.add_steps_args(parser) + breakpoint() # check what mode we're in # if we have a config file, use that to decide which modules to load # if simple, we'll load just the modules that has requires_setup = False # if full, we'll load all modules - if ini_config: + if yaml_config != EMPTY_CONFIG: # only load the modules enabled in config + # TODO: if some steps are empty (e.g. 'feeders' is empty), should we default to the 'simple' ones? Or only if they are ALL empty? enabled_modules = [] for module_type in MODULE_TYPES: - try: - enabled_modules.extend(ini_config.get("STEPS", module_type)) - except configparser.NoOptionError: - pass + enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", [])) # add in any extra modules that have been passed on the command line for 'feeders', 'enrichers', 'archivers', 'databases', 'storages', 'formatter' for module_type in MODULE_TYPES: @@ -100,23 +98,25 @@ class ArchivingOrchestrator: # add them to the config for module in simple_modules: for module_type in module.type: - existing_modules = config['STEPS'] = module.name - ini_config.setdefault(f"{module_type}s", []).append(module.name) - + yaml_config['steps'].setdefault(f"{module_type}s", []).append(module.name) else: # load all modules, they're not using the 'simple' mode self.add_module_args(available_modules(with_manifest=True), parser) - - parser.set_defaults(**ini_config) + + breakpoint() + parser.set_defaults(**to_dot_notation(yaml_config)) # reload the parser with the new arguments, now that we have them - self.config, unknown = parser.parse_known_args(unused_args) + parsed, unknown = parser.parse_known_args(unused_args) if unknown: - logger.warning(f"Ignoring unknown/unused arguments: {unknown}") + logger.warning(f"Ignoring unknown/unused arguments: {unknown}\nPerhaps you don't have this module enabled?") + + # merge the new config with the old one + yaml_config = merge_dicts(vars(parsed), yaml_config) if self.config and basic_config.store or not os.path.isfile(join(dirname(__file__), basic_config.config_file)): logger.info(f"Storing configuration file to {basic_config.config_file}") - store_config(ini_config, basic_config.config_file) + store_yaml(yaml_config, basic_config.config_file) breakpoint() logger.info(f"FEEDER: {self.config.feeders}") logger.info(f"ENRICHERS: {self.config.enrichers}") @@ -179,16 +179,16 @@ class ArchivingOrchestrator: self.show_help() # load the config file - ini_config = {} + yaml_config = {} - try: - ini_config = read_config(basic_config.config_file) - except FileNotFoundError: - if basic_config.config_file != DEFAULT_CONFIG_FILE: - logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.") - exit() + if not os.path.exists(basic_config.config_file) and basic_config.config_file != DEFAULT_CONFIG_FILE: + logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.") + exit() - self.setup_complete_parser(basic_config, ini_config, unused_args) + yaml_config = read_yaml(basic_config.config_file) + + + self.setup_complete_parser(basic_config, yaml_config, unused_args) config.parse()