kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix loading/saving to orchestration file with comments
rodzic
f68e2726f2
commit
e3074013d0
|
@ -13,47 +13,23 @@ from .module import MODULE_TYPES
|
|||
|
||||
from typing import Any, List, Type
|
||||
|
||||
# configurable_parents = [
|
||||
# Feeder,
|
||||
# Enricher,
|
||||
# Extractor,
|
||||
# Database,
|
||||
# Storage,
|
||||
# Formatter
|
||||
# # Util
|
||||
# ]
|
||||
# feeder: Feeder
|
||||
# formatter: Formatter
|
||||
# extractors: List[Extractor] = field(default_factory=[])
|
||||
# enrichers: List[Enricher] = field(default_factory=[])
|
||||
# storages: List[Storage] = field(default_factory=[])
|
||||
# databases: List[Database] = field(default_factory=[])
|
||||
yaml = YAML()
|
||||
|
||||
# def __init__(self) -> None:
|
||||
# self.defaults = {}
|
||||
# self.cli_ops = {}
|
||||
# self.config = {}
|
||||
EMPTY_CONFIG = yaml.load("""
|
||||
# Auto Archiver Configuration
|
||||
# Steps are the modules that will be run in the order they are defined
|
||||
|
||||
# def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs: str = {}):
|
||||
# """
|
||||
# if yaml_config_filename is provided, the --config argument is ignored,
|
||||
# useful for library usage when the config values are preloaded
|
||||
# overwrite_configs is a dict that overwrites the yaml file contents
|
||||
# """
|
||||
# # 1. parse CLI values
|
||||
# if use_cli:
|
||||
# parser = argparse.ArgumentParser(
|
||||
# # prog = "auto-archiver",
|
||||
# description="Auto Archiver is a CLI tool to archive media/metadata from online URLs; it can read URLs from many sources (Google Sheets, Command Line, ...); and write results to many destinations too (CSV, Google Sheets, MongoDB, ...)!",
|
||||
# epilog="Check the code at https://github.com/bellingcat/auto-archiver"
|
||||
# )
|
||||
steps:""" + "".join([f"\n {module}s: []" for module in MODULE_TYPES]) + \
|
||||
"""
|
||||
|
||||
# parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
|
||||
# parser.add_argument('--version', action='version', version=__version__)
|
||||
# Global configuration
|
||||
# These are the global configurations that are used by the modules
|
||||
|
||||
logging:
|
||||
level: INFO
|
||||
""")
|
||||
# note: 'logging' is explicitly added above in order to better format the config file
|
||||
|
||||
EMPTY_CONFIG = CommentedMap(**{
|
||||
"steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES)
|
||||
})
|
||||
|
||||
def to_dot_notation(yaml_conf: CommentedMap | dict) -> argparse.ArgumentParser:
|
||||
dotdict = {}
|
||||
|
@ -112,8 +88,6 @@ def merge_dicts(dotdict: dict, yaml_dict: CommentedMap) -> CommentedMap:
|
|||
|
||||
return yaml_dict
|
||||
|
||||
yaml = YAML()
|
||||
|
||||
def read_yaml(yaml_filename: str) -> CommentedMap:
|
||||
config = None
|
||||
try:
|
||||
|
|
|
@ -100,7 +100,6 @@ class LazyBaseModule:
|
|||
name: str
|
||||
display_name: str
|
||||
type: list
|
||||
requires_setup: bool
|
||||
description: str
|
||||
path: str
|
||||
|
||||
|
@ -111,7 +110,7 @@ class LazyBaseModule:
|
|||
def __init__(self, module_name, path):
|
||||
self.name = module_name
|
||||
self.path = path
|
||||
|
||||
|
||||
@property
|
||||
def entry_point(self):
|
||||
if not self._entry_point and not self.manifest['entry_point']:
|
||||
|
@ -126,6 +125,10 @@ class LazyBaseModule:
|
|||
@property
|
||||
def configs(self):
|
||||
return self.manifest['configs']
|
||||
|
||||
@property
|
||||
def requires_setup(self):
|
||||
return self.manifest['requires_setup']
|
||||
|
||||
@property
|
||||
def manifest(self):
|
||||
|
@ -145,7 +148,6 @@ class LazyBaseModule:
|
|||
self.display_name = manifest['name']
|
||||
self.type = manifest['type']
|
||||
self._entry_point = manifest['entry_point']
|
||||
self.requires_setup = manifest['requires_setup']
|
||||
self.description = manifest['description']
|
||||
|
||||
return manifest
|
||||
|
|
|
@ -168,13 +168,14 @@ class ArchivingOrchestrator:
|
|||
# TODO: go through all the manifests and make sure we're not breaking anything with removing cli_set
|
||||
# in most cases it'll mean replacing it with 'type': 'str' or 'type': 'int' or something
|
||||
kwargs.pop('cli_set', None)
|
||||
|
||||
should_store = kwargs.pop('should_store', False)
|
||||
kwargs['dest'] = f"{module.name}.{kwargs.pop('dest', name)}"
|
||||
try:
|
||||
kwargs['type'] = __builtins__.get(kwargs.get('type'), str)
|
||||
except KeyError:
|
||||
kwargs['type'] = getattr(validators, kwargs['type'])
|
||||
group.add_argument(f"--{module.name}.{name}", **kwargs)
|
||||
arg = group.add_argument(f"--{module.name}.{name}", **kwargs)
|
||||
arg.should_store = should_store
|
||||
|
||||
def show_help(self):
|
||||
# for the help message, we want to load *all* possible modules and show the help
|
||||
|
@ -255,7 +256,7 @@ class ArchivingOrchestrator:
|
|||
exit()
|
||||
|
||||
yaml_config = read_yaml(basic_config.config_file)
|
||||
|
||||
|
||||
|
||||
self.setup_complete_parser(basic_config, yaml_config, unused_args)
|
||||
|
||||
|
|
Ładowanie…
Reference in New Issue