Switch back to using yaml with dot notation

(two simple helper functions to convert between dot and dict notation)
pull/183/head
Patrick Robertson 2025-01-22 17:40:51 +01:00
rodzic 54995ad6ab
commit b6b085854c
3 zmienionych plików z 77 dodań i 39 usunięć

Wyświetl plik

@ -4,10 +4,13 @@ It supports CLI argument parsing, loading from YAML file, and overrides to allow
flexible setup in various environments. flexible setup in various environments.
""" """
import argparse
from configparser import ConfigParser
from dataclasses import dataclass, field
import argparse
import yaml
from dataclasses import dataclass, field
from collections import OrderedDict
from .loader import MODULE_TYPES
# configurable_parents = [ # configurable_parents = [
# Feeder, # Feeder,
@ -47,21 +50,56 @@ from dataclasses import dataclass, field
# parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml') # parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
# parser.add_argument('--version', action='version', version=__version__) # parser.add_argument('--version', action='version', version=__version__)
EMPTY_CONFIG = {
"steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES)
}
class LoadFromFile (argparse.Action): class LoadFromFile (argparse.Action):
def __call__ (self, parser, namespace, values, option_string = None): def __call__ (self, parser, namespace, values, option_string = None):
with values as f: with values as f:
# parse arguments in the file and store them in the target namespace # parse arguments in the file and store them in the target namespace
parser.parse_args(f.read().split(), namespace) parser.parse_args(f.read().split(), namespace)
def read_config(config_filename: str) -> dict: def to_dot_notation(yaml_conf: str) -> argparse.ArgumentParser:
config = ConfigParser() dotdict = {}
config.read(config_filename) for step, vals in yaml_conf.pop('steps', {}).items():
# setup basic format if vals:
if 'STEPS' not in config.sections(): dotdict[f"{step}s"] = vals
config.add_section("STEPS")
def process_subdict(subdict, prefix=""):
for key, value in subdict.items():
if type(value) == dict:
process_subdict(value, f"{prefix}{key}.")
else:
dotdict[f"{prefix}{key}"] = value
process_subdict(yaml_conf)
return dotdict
def merge_dicts(dotdict, yaml_dict):
def process_subdict(subdict, prefix=""):
for key, value in subdict.items():
if "." in key:
keys = key.split(".")
subdict = yaml_dict
for k in keys[:-1]:
subdict = subdict.setdefault(k, {})
subdict[keys[-1]] = value
else:
yaml_dict[key] = value
process_subdict(dotdict)
return yaml_dict
def read_yaml(yaml_filename: str) -> dict:
try:
with open(yaml_filename, "r", encoding="utf-8") as inf:
config = yaml.safe_load(inf)
except FileNotFoundError:
config = EMPTY_CONFIG
return config return config
def store_config(config: ConfigParser, config_filename: str): def store_yaml(config: dict, yaml_filename: str):
with open(config_filename, "w", encoding="utf-8") as outf: with open(yaml_filename, "w", encoding="utf-8") as outf:
config.write(outf) yaml.dump(config, outf, default_flow_style=False)

Wyświetl plik

@ -62,7 +62,7 @@ class Module:
def load_manifest(module_path): def load_manifest(module_path):
print(f"Loading manifest for module {module_path}") # print(f"Loading manifest for module {module_path}")
# load the manifest file # load the manifest file
manifest = copy.deepcopy(_DEFAULT_MANIFEST) manifest = copy.deepcopy(_DEFAULT_MANIFEST)

Wyświetl plik

@ -9,7 +9,6 @@ from typing import Generator, Union, List
from urllib.parse import urlparse from urllib.parse import urlparse
from ipaddress import ip_address from ipaddress import ip_address
import argparse import argparse
import configparser
import os import os
from os.path import join, dirname from os.path import join, dirname
@ -25,7 +24,7 @@ from ..enrichers import Enricher
from ..databases import Database from ..databases import Database
from .metadata import Metadata from .metadata import Metadata
from ..version import __version__ from ..version import __version__
from .config import read_config, store_config from .config import read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG
from .loader import available_modules, Module, MODULE_TYPES from .loader import available_modules, Module, MODULE_TYPES
import tempfile, traceback import tempfile, traceback
@ -69,24 +68,23 @@ class ArchivingOrchestrator:
parser.add_argument('-s', '--store', action='store_true', dest='store', help='Store the created config in the config file') parser.add_argument('-s', '--store', action='store_true', dest='store', help='Store the created config in the config file')
self.basic_parser = parser self.basic_parser = parser
def setup_complete_parser(self, basic_config: dict, ini_config: dict, unused_args: list[str]) -> None: def setup_complete_parser(self, basic_config: dict, yaml_config: dict, unused_args: list[str]) -> None:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
parents = [self.basic_parser], parents = [self.basic_parser],
add_help=False, add_help=False,
) )
self.add_steps_args(parser)
breakpoint()
# check what mode we're in # check what mode we're in
# if we have a config file, use that to decide which modules to load # if we have a config file, use that to decide which modules to load
# if simple, we'll load just the modules that has requires_setup = False # if simple, we'll load just the modules that has requires_setup = False
# if full, we'll load all modules # if full, we'll load all modules
if ini_config: if yaml_config != EMPTY_CONFIG:
# only load the modules enabled in config # only load the modules enabled in config
# TODO: if some steps are empty (e.g. 'feeders' is empty), should we default to the 'simple' ones? Or only if they are ALL empty?
enabled_modules = [] enabled_modules = []
for module_type in MODULE_TYPES: for module_type in MODULE_TYPES:
try: enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", []))
enabled_modules.extend(ini_config.get("STEPS", module_type))
except configparser.NoOptionError:
pass
# add in any extra modules that have been passed on the command line for 'feeders', 'enrichers', 'archivers', 'databases', 'storages', 'formatter' # add in any extra modules that have been passed on the command line for 'feeders', 'enrichers', 'archivers', 'databases', 'storages', 'formatter'
for module_type in MODULE_TYPES: for module_type in MODULE_TYPES:
@ -100,23 +98,25 @@ class ArchivingOrchestrator:
# add them to the config # add them to the config
for module in simple_modules: for module in simple_modules:
for module_type in module.type: for module_type in module.type:
existing_modules = config['STEPS'] = module.name yaml_config['steps'].setdefault(f"{module_type}s", []).append(module.name)
ini_config.setdefault(f"{module_type}s", []).append(module.name)
else: else:
# load all modules, they're not using the 'simple' mode # load all modules, they're not using the 'simple' mode
self.add_module_args(available_modules(with_manifest=True), parser) self.add_module_args(available_modules(with_manifest=True), parser)
parser.set_defaults(**ini_config) breakpoint()
parser.set_defaults(**to_dot_notation(yaml_config))
# reload the parser with the new arguments, now that we have them # reload the parser with the new arguments, now that we have them
self.config, unknown = parser.parse_known_args(unused_args) parsed, unknown = parser.parse_known_args(unused_args)
if unknown: if unknown:
logger.warning(f"Ignoring unknown/unused arguments: {unknown}") logger.warning(f"Ignoring unknown/unused arguments: {unknown}\nPerhaps you don't have this module enabled?")
# merge the new config with the old one
yaml_config = merge_dicts(vars(parsed), yaml_config)
if self.config and basic_config.store or not os.path.isfile(join(dirname(__file__), basic_config.config_file)): if self.config and basic_config.store or not os.path.isfile(join(dirname(__file__), basic_config.config_file)):
logger.info(f"Storing configuration file to {basic_config.config_file}") logger.info(f"Storing configuration file to {basic_config.config_file}")
store_config(ini_config, basic_config.config_file) store_yaml(yaml_config, basic_config.config_file)
breakpoint() breakpoint()
logger.info(f"FEEDER: {self.config.feeders}") logger.info(f"FEEDER: {self.config.feeders}")
logger.info(f"ENRICHERS: {self.config.enrichers}") logger.info(f"ENRICHERS: {self.config.enrichers}")
@ -179,16 +179,16 @@ class ArchivingOrchestrator:
self.show_help() self.show_help()
# load the config file # load the config file
ini_config = {} yaml_config = {}
try: if not os.path.exists(basic_config.config_file) and basic_config.config_file != DEFAULT_CONFIG_FILE:
ini_config = read_config(basic_config.config_file)
except FileNotFoundError:
if basic_config.config_file != DEFAULT_CONFIG_FILE:
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.") logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
exit() exit()
self.setup_complete_parser(basic_config, ini_config, unused_args) yaml_config = read_yaml(basic_config.config_file)
self.setup_complete_parser(basic_config, yaml_config, unused_args)
config.parse() config.parse()