kopia lustrzana https://github.com/bellingcat/auto-archiver
Switch back to using yaml with dot notation
(two simple helper functions to convert between dot and dict notation)pull/183/head
rodzic
54995ad6ab
commit
b6b085854c
|
@ -4,10 +4,13 @@ It supports CLI argument parsing, loading from YAML file, and overrides to allow
|
|||
flexible setup in various environments.
|
||||
|
||||
"""
|
||||
import argparse
|
||||
from configparser import ConfigParser
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import argparse
|
||||
import yaml
|
||||
from dataclasses import dataclass, field
|
||||
from collections import OrderedDict
|
||||
|
||||
from .loader import MODULE_TYPES
|
||||
|
||||
# configurable_parents = [
|
||||
# Feeder,
|
||||
|
@ -47,21 +50,56 @@ from dataclasses import dataclass, field
|
|||
# parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
|
||||
# parser.add_argument('--version', action='version', version=__version__)
|
||||
|
||||
|
||||
EMPTY_CONFIG = {
|
||||
"steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES)
|
||||
}
|
||||
class LoadFromFile (argparse.Action):
|
||||
def __call__ (self, parser, namespace, values, option_string = None):
|
||||
with values as f:
|
||||
# parse arguments in the file and store them in the target namespace
|
||||
parser.parse_args(f.read().split(), namespace)
|
||||
|
||||
def read_config(config_filename: str) -> dict:
|
||||
config = ConfigParser()
|
||||
config.read(config_filename)
|
||||
# setup basic format
|
||||
if 'STEPS' not in config.sections():
|
||||
config.add_section("STEPS")
|
||||
def to_dot_notation(yaml_conf: str) -> argparse.ArgumentParser:
|
||||
dotdict = {}
|
||||
for step, vals in yaml_conf.pop('steps', {}).items():
|
||||
if vals:
|
||||
dotdict[f"{step}s"] = vals
|
||||
|
||||
def process_subdict(subdict, prefix=""):
|
||||
for key, value in subdict.items():
|
||||
if type(value) == dict:
|
||||
process_subdict(value, f"{prefix}{key}.")
|
||||
else:
|
||||
dotdict[f"{prefix}{key}"] = value
|
||||
|
||||
process_subdict(yaml_conf)
|
||||
return dotdict
|
||||
|
||||
def merge_dicts(dotdict, yaml_dict):
|
||||
def process_subdict(subdict, prefix=""):
|
||||
for key, value in subdict.items():
|
||||
if "." in key:
|
||||
keys = key.split(".")
|
||||
subdict = yaml_dict
|
||||
for k in keys[:-1]:
|
||||
subdict = subdict.setdefault(k, {})
|
||||
subdict[keys[-1]] = value
|
||||
else:
|
||||
yaml_dict[key] = value
|
||||
|
||||
process_subdict(dotdict)
|
||||
return yaml_dict
|
||||
|
||||
def read_yaml(yaml_filename: str) -> dict:
|
||||
|
||||
try:
|
||||
with open(yaml_filename, "r", encoding="utf-8") as inf:
|
||||
config = yaml.safe_load(inf)
|
||||
except FileNotFoundError:
|
||||
config = EMPTY_CONFIG
|
||||
|
||||
return config
|
||||
|
||||
def store_config(config: ConfigParser, config_filename: str):
|
||||
with open(config_filename, "w", encoding="utf-8") as outf:
|
||||
config.write(outf)
|
||||
def store_yaml(config: dict, yaml_filename: str):
|
||||
with open(yaml_filename, "w", encoding="utf-8") as outf:
|
||||
yaml.dump(config, outf, default_flow_style=False)
|
|
@ -62,7 +62,7 @@ class Module:
|
|||
|
||||
|
||||
def load_manifest(module_path):
|
||||
print(f"Loading manifest for module {module_path}")
|
||||
# print(f"Loading manifest for module {module_path}")
|
||||
# load the manifest file
|
||||
manifest = copy.deepcopy(_DEFAULT_MANIFEST)
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@ from typing import Generator, Union, List
|
|||
from urllib.parse import urlparse
|
||||
from ipaddress import ip_address
|
||||
import argparse
|
||||
import configparser
|
||||
import os
|
||||
from os.path import join, dirname
|
||||
|
||||
|
@ -25,7 +24,7 @@ from ..enrichers import Enricher
|
|||
from ..databases import Database
|
||||
from .metadata import Metadata
|
||||
from ..version import __version__
|
||||
from .config import read_config, store_config
|
||||
from .config import read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG
|
||||
from .loader import available_modules, Module, MODULE_TYPES
|
||||
|
||||
import tempfile, traceback
|
||||
|
@ -69,24 +68,23 @@ class ArchivingOrchestrator:
|
|||
parser.add_argument('-s', '--store', action='store_true', dest='store', help='Store the created config in the config file')
|
||||
self.basic_parser = parser
|
||||
|
||||
def setup_complete_parser(self, basic_config: dict, ini_config: dict, unused_args: list[str]) -> None:
|
||||
def setup_complete_parser(self, basic_config: dict, yaml_config: dict, unused_args: list[str]) -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
parents = [self.basic_parser],
|
||||
add_help=False,
|
||||
)
|
||||
|
||||
self.add_steps_args(parser)
|
||||
breakpoint()
|
||||
# check what mode we're in
|
||||
# if we have a config file, use that to decide which modules to load
|
||||
# if simple, we'll load just the modules that has requires_setup = False
|
||||
# if full, we'll load all modules
|
||||
if ini_config:
|
||||
if yaml_config != EMPTY_CONFIG:
|
||||
# only load the modules enabled in config
|
||||
# TODO: if some steps are empty (e.g. 'feeders' is empty), should we default to the 'simple' ones? Or only if they are ALL empty?
|
||||
enabled_modules = []
|
||||
for module_type in MODULE_TYPES:
|
||||
try:
|
||||
enabled_modules.extend(ini_config.get("STEPS", module_type))
|
||||
except configparser.NoOptionError:
|
||||
pass
|
||||
enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", []))
|
||||
|
||||
# add in any extra modules that have been passed on the command line for 'feeders', 'enrichers', 'archivers', 'databases', 'storages', 'formatter'
|
||||
for module_type in MODULE_TYPES:
|
||||
|
@ -100,23 +98,25 @@ class ArchivingOrchestrator:
|
|||
# add them to the config
|
||||
for module in simple_modules:
|
||||
for module_type in module.type:
|
||||
existing_modules = config['STEPS'] = module.name
|
||||
ini_config.setdefault(f"{module_type}s", []).append(module.name)
|
||||
|
||||
yaml_config['steps'].setdefault(f"{module_type}s", []).append(module.name)
|
||||
else:
|
||||
# load all modules, they're not using the 'simple' mode
|
||||
self.add_module_args(available_modules(with_manifest=True), parser)
|
||||
|
||||
parser.set_defaults(**ini_config)
|
||||
|
||||
breakpoint()
|
||||
parser.set_defaults(**to_dot_notation(yaml_config))
|
||||
|
||||
# reload the parser with the new arguments, now that we have them
|
||||
self.config, unknown = parser.parse_known_args(unused_args)
|
||||
parsed, unknown = parser.parse_known_args(unused_args)
|
||||
if unknown:
|
||||
logger.warning(f"Ignoring unknown/unused arguments: {unknown}")
|
||||
logger.warning(f"Ignoring unknown/unused arguments: {unknown}\nPerhaps you don't have this module enabled?")
|
||||
|
||||
# merge the new config with the old one
|
||||
yaml_config = merge_dicts(vars(parsed), yaml_config)
|
||||
|
||||
if self.config and basic_config.store or not os.path.isfile(join(dirname(__file__), basic_config.config_file)):
|
||||
logger.info(f"Storing configuration file to {basic_config.config_file}")
|
||||
store_config(ini_config, basic_config.config_file)
|
||||
store_yaml(yaml_config, basic_config.config_file)
|
||||
breakpoint()
|
||||
logger.info(f"FEEDER: {self.config.feeders}")
|
||||
logger.info(f"ENRICHERS: {self.config.enrichers}")
|
||||
|
@ -179,16 +179,16 @@ class ArchivingOrchestrator:
|
|||
self.show_help()
|
||||
|
||||
# load the config file
|
||||
ini_config = {}
|
||||
yaml_config = {}
|
||||
|
||||
try:
|
||||
ini_config = read_config(basic_config.config_file)
|
||||
except FileNotFoundError:
|
||||
if basic_config.config_file != DEFAULT_CONFIG_FILE:
|
||||
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
|
||||
exit()
|
||||
if not os.path.exists(basic_config.config_file) and basic_config.config_file != DEFAULT_CONFIG_FILE:
|
||||
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
|
||||
exit()
|
||||
|
||||
self.setup_complete_parser(basic_config, ini_config, unused_args)
|
||||
yaml_config = read_yaml(basic_config.config_file)
|
||||
|
||||
|
||||
self.setup_complete_parser(basic_config, yaml_config, unused_args)
|
||||
|
||||
config.parse()
|
||||
|
||||
|
|
Ładowanie…
Reference in New Issue