kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix loading/saving to orchestration file with comments
rodzic
f68e2726f2
commit
e3074013d0
|
@ -13,47 +13,23 @@ from .module import MODULE_TYPES
|
||||||
|
|
||||||
from typing import Any, List, Type
|
from typing import Any, List, Type
|
||||||
|
|
||||||
# configurable_parents = [
|
yaml = YAML()
|
||||||
# Feeder,
|
|
||||||
# Enricher,
|
|
||||||
# Extractor,
|
|
||||||
# Database,
|
|
||||||
# Storage,
|
|
||||||
# Formatter
|
|
||||||
# # Util
|
|
||||||
# ]
|
|
||||||
# feeder: Feeder
|
|
||||||
# formatter: Formatter
|
|
||||||
# extractors: List[Extractor] = field(default_factory=[])
|
|
||||||
# enrichers: List[Enricher] = field(default_factory=[])
|
|
||||||
# storages: List[Storage] = field(default_factory=[])
|
|
||||||
# databases: List[Database] = field(default_factory=[])
|
|
||||||
|
|
||||||
# def __init__(self) -> None:
|
EMPTY_CONFIG = yaml.load("""
|
||||||
# self.defaults = {}
|
# Auto Archiver Configuration
|
||||||
# self.cli_ops = {}
|
# Steps are the modules that will be run in the order they are defined
|
||||||
# self.config = {}
|
|
||||||
|
|
||||||
# def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs: str = {}):
|
steps:""" + "".join([f"\n {module}s: []" for module in MODULE_TYPES]) + \
|
||||||
# """
|
"""
|
||||||
# if yaml_config_filename is provided, the --config argument is ignored,
|
|
||||||
# useful for library usage when the config values are preloaded
|
|
||||||
# overwrite_configs is a dict that overwrites the yaml file contents
|
|
||||||
# """
|
|
||||||
# # 1. parse CLI values
|
|
||||||
# if use_cli:
|
|
||||||
# parser = argparse.ArgumentParser(
|
|
||||||
# # prog = "auto-archiver",
|
|
||||||
# description="Auto Archiver is a CLI tool to archive media/metadata from online URLs; it can read URLs from many sources (Google Sheets, Command Line, ...); and write results to many destinations too (CSV, Google Sheets, MongoDB, ...)!",
|
|
||||||
# epilog="Check the code at https://github.com/bellingcat/auto-archiver"
|
|
||||||
# )
|
|
||||||
|
|
||||||
# parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
|
# Global configuration
|
||||||
# parser.add_argument('--version', action='version', version=__version__)
|
# These are the global configurations that are used by the modules
|
||||||
|
|
||||||
|
logging:
|
||||||
|
level: INFO
|
||||||
|
""")
|
||||||
|
# note: 'logging' is explicitly added above in order to better format the config file
|
||||||
|
|
||||||
EMPTY_CONFIG = CommentedMap(**{
|
|
||||||
"steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES)
|
|
||||||
})
|
|
||||||
|
|
||||||
def to_dot_notation(yaml_conf: CommentedMap | dict) -> argparse.ArgumentParser:
|
def to_dot_notation(yaml_conf: CommentedMap | dict) -> argparse.ArgumentParser:
|
||||||
dotdict = {}
|
dotdict = {}
|
||||||
|
@ -112,8 +88,6 @@ def merge_dicts(dotdict: dict, yaml_dict: CommentedMap) -> CommentedMap:
|
||||||
|
|
||||||
return yaml_dict
|
return yaml_dict
|
||||||
|
|
||||||
yaml = YAML()
|
|
||||||
|
|
||||||
def read_yaml(yaml_filename: str) -> CommentedMap:
|
def read_yaml(yaml_filename: str) -> CommentedMap:
|
||||||
config = None
|
config = None
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -100,7 +100,6 @@ class LazyBaseModule:
|
||||||
name: str
|
name: str
|
||||||
display_name: str
|
display_name: str
|
||||||
type: list
|
type: list
|
||||||
requires_setup: bool
|
|
||||||
description: str
|
description: str
|
||||||
path: str
|
path: str
|
||||||
|
|
||||||
|
@ -127,6 +126,10 @@ class LazyBaseModule:
|
||||||
def configs(self):
|
def configs(self):
|
||||||
return self.manifest['configs']
|
return self.manifest['configs']
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_setup(self):
|
||||||
|
return self.manifest['requires_setup']
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def manifest(self):
|
def manifest(self):
|
||||||
if self._manifest:
|
if self._manifest:
|
||||||
|
@ -145,7 +148,6 @@ class LazyBaseModule:
|
||||||
self.display_name = manifest['name']
|
self.display_name = manifest['name']
|
||||||
self.type = manifest['type']
|
self.type = manifest['type']
|
||||||
self._entry_point = manifest['entry_point']
|
self._entry_point = manifest['entry_point']
|
||||||
self.requires_setup = manifest['requires_setup']
|
|
||||||
self.description = manifest['description']
|
self.description = manifest['description']
|
||||||
|
|
||||||
return manifest
|
return manifest
|
||||||
|
|
|
@ -168,13 +168,14 @@ class ArchivingOrchestrator:
|
||||||
# TODO: go through all the manifests and make sure we're not breaking anything with removing cli_set
|
# TODO: go through all the manifests and make sure we're not breaking anything with removing cli_set
|
||||||
# in most cases it'll mean replacing it with 'type': 'str' or 'type': 'int' or something
|
# in most cases it'll mean replacing it with 'type': 'str' or 'type': 'int' or something
|
||||||
kwargs.pop('cli_set', None)
|
kwargs.pop('cli_set', None)
|
||||||
|
should_store = kwargs.pop('should_store', False)
|
||||||
kwargs['dest'] = f"{module.name}.{kwargs.pop('dest', name)}"
|
kwargs['dest'] = f"{module.name}.{kwargs.pop('dest', name)}"
|
||||||
try:
|
try:
|
||||||
kwargs['type'] = __builtins__.get(kwargs.get('type'), str)
|
kwargs['type'] = __builtins__.get(kwargs.get('type'), str)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
kwargs['type'] = getattr(validators, kwargs['type'])
|
kwargs['type'] = getattr(validators, kwargs['type'])
|
||||||
group.add_argument(f"--{module.name}.{name}", **kwargs)
|
arg = group.add_argument(f"--{module.name}.{name}", **kwargs)
|
||||||
|
arg.should_store = should_store
|
||||||
|
|
||||||
def show_help(self):
|
def show_help(self):
|
||||||
# for the help message, we want to load *all* possible modules and show the help
|
# for the help message, we want to load *all* possible modules and show the help
|
||||||
|
|
Ładowanie…
Reference in New Issue