Switch back to using yaml with dot notation

(two simple helper functions to convert between dot and dict notation)
pull/183/head
Patrick Robertson 2025-01-22 17:40:51 +01:00
rodzic 54995ad6ab
commit b6b085854c
3 zmienionych plików z 77 dodań i 39 usunięć

Wyświetl plik

@ -4,10 +4,13 @@ It supports CLI argument parsing, loading from YAML file, and overrides to allow
flexible setup in various environments.
"""
import argparse
from configparser import ConfigParser
from dataclasses import dataclass, field
import argparse
import yaml
from dataclasses import dataclass, field
from collections import OrderedDict
from .loader import MODULE_TYPES
# configurable_parents = [
# Feeder,
@ -47,21 +50,56 @@ from dataclasses import dataclass, field
# parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
# parser.add_argument('--version', action='version', version=__version__)
EMPTY_CONFIG = {
"steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES)
}
class LoadFromFile (argparse.Action):
def __call__ (self, parser, namespace, values, option_string = None):
with values as f:
# parse arguments in the file and store them in the target namespace
parser.parse_args(f.read().split(), namespace)
def read_config(config_filename: str) -> dict:
config = ConfigParser()
config.read(config_filename)
# setup basic format
if 'STEPS' not in config.sections():
config.add_section("STEPS")
def to_dot_notation(yaml_conf: str) -> argparse.ArgumentParser:
dotdict = {}
for step, vals in yaml_conf.pop('steps', {}).items():
if vals:
dotdict[f"{step}s"] = vals
def process_subdict(subdict, prefix=""):
for key, value in subdict.items():
if type(value) == dict:
process_subdict(value, f"{prefix}{key}.")
else:
dotdict[f"{prefix}{key}"] = value
process_subdict(yaml_conf)
return dotdict
def merge_dicts(dotdict, yaml_dict):
def process_subdict(subdict, prefix=""):
for key, value in subdict.items():
if "." in key:
keys = key.split(".")
subdict = yaml_dict
for k in keys[:-1]:
subdict = subdict.setdefault(k, {})
subdict[keys[-1]] = value
else:
yaml_dict[key] = value
process_subdict(dotdict)
return yaml_dict
def read_yaml(yaml_filename: str) -> dict:
try:
with open(yaml_filename, "r", encoding="utf-8") as inf:
config = yaml.safe_load(inf)
except FileNotFoundError:
config = EMPTY_CONFIG
return config
def store_config(config: ConfigParser, config_filename: str):
with open(config_filename, "w", encoding="utf-8") as outf:
config.write(outf)
def store_yaml(config: dict, yaml_filename: str):
with open(yaml_filename, "w", encoding="utf-8") as outf:
yaml.dump(config, outf, default_flow_style=False)

Wyświetl plik

@ -62,7 +62,7 @@ class Module:
def load_manifest(module_path):
print(f"Loading manifest for module {module_path}")
# print(f"Loading manifest for module {module_path}")
# load the manifest file
manifest = copy.deepcopy(_DEFAULT_MANIFEST)

Wyświetl plik

@ -9,7 +9,6 @@ from typing import Generator, Union, List
from urllib.parse import urlparse
from ipaddress import ip_address
import argparse
import configparser
import os
from os.path import join, dirname
@ -25,7 +24,7 @@ from ..enrichers import Enricher
from ..databases import Database
from .metadata import Metadata
from ..version import __version__
from .config import read_config, store_config
from .config import read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG
from .loader import available_modules, Module, MODULE_TYPES
import tempfile, traceback
@ -69,24 +68,23 @@ class ArchivingOrchestrator:
parser.add_argument('-s', '--store', action='store_true', dest='store', help='Store the created config in the config file')
self.basic_parser = parser
def setup_complete_parser(self, basic_config: dict, ini_config: dict, unused_args: list[str]) -> None:
def setup_complete_parser(self, basic_config: dict, yaml_config: dict, unused_args: list[str]) -> None:
parser = argparse.ArgumentParser(
parents = [self.basic_parser],
add_help=False,
)
self.add_steps_args(parser)
breakpoint()
# check what mode we're in
# if we have a config file, use that to decide which modules to load
# if simple, we'll load just the modules that has requires_setup = False
# if full, we'll load all modules
if ini_config:
if yaml_config != EMPTY_CONFIG:
# only load the modules enabled in config
# TODO: if some steps are empty (e.g. 'feeders' is empty), should we default to the 'simple' ones? Or only if they are ALL empty?
enabled_modules = []
for module_type in MODULE_TYPES:
try:
enabled_modules.extend(ini_config.get("STEPS", module_type))
except configparser.NoOptionError:
pass
enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", []))
# add in any extra modules that have been passed on the command line for 'feeders', 'enrichers', 'archivers', 'databases', 'storages', 'formatter'
for module_type in MODULE_TYPES:
@ -100,23 +98,25 @@ class ArchivingOrchestrator:
# add them to the config
for module in simple_modules:
for module_type in module.type:
existing_modules = config['STEPS'] = module.name
ini_config.setdefault(f"{module_type}s", []).append(module.name)
yaml_config['steps'].setdefault(f"{module_type}s", []).append(module.name)
else:
# load all modules, they're not using the 'simple' mode
self.add_module_args(available_modules(with_manifest=True), parser)
parser.set_defaults(**ini_config)
breakpoint()
parser.set_defaults(**to_dot_notation(yaml_config))
# reload the parser with the new arguments, now that we have them
self.config, unknown = parser.parse_known_args(unused_args)
parsed, unknown = parser.parse_known_args(unused_args)
if unknown:
logger.warning(f"Ignoring unknown/unused arguments: {unknown}")
logger.warning(f"Ignoring unknown/unused arguments: {unknown}\nPerhaps you don't have this module enabled?")
# merge the new config with the old one
yaml_config = merge_dicts(vars(parsed), yaml_config)
if self.config and basic_config.store or not os.path.isfile(join(dirname(__file__), basic_config.config_file)):
logger.info(f"Storing configuration file to {basic_config.config_file}")
store_config(ini_config, basic_config.config_file)
store_yaml(yaml_config, basic_config.config_file)
breakpoint()
logger.info(f"FEEDER: {self.config.feeders}")
logger.info(f"ENRICHERS: {self.config.enrichers}")
@ -179,16 +179,16 @@ class ArchivingOrchestrator:
self.show_help()
# load the config file
ini_config = {}
yaml_config = {}
try:
ini_config = read_config(basic_config.config_file)
except FileNotFoundError:
if basic_config.config_file != DEFAULT_CONFIG_FILE:
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
exit()
if not os.path.exists(basic_config.config_file) and basic_config.config_file != DEFAULT_CONFIG_FILE:
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
exit()
self.setup_complete_parser(basic_config, ini_config, unused_args)
yaml_config = read_yaml(basic_config.config_file)
self.setup_complete_parser(basic_config, yaml_config, unused_args)
config.parse()