kopia lustrzana https://github.com/bellingcat/auto-archiver
Validate orchestration.yaml file inputs - so if a user enters invalid values, it also validates them
rodzic
1d2a1d4db7
commit
27b25c5bd4
|
@ -8,6 +8,8 @@ flexible setup in various environments.
|
|||
import argparse
|
||||
from ruamel.yaml import YAML, CommentedMap, add_representer
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from copy import deepcopy
|
||||
from .module import MODULE_TYPES
|
||||
|
||||
|
@ -30,8 +32,22 @@ logging:
|
|||
""")
|
||||
# note: 'logging' is explicitly added above in order to better format the config file
|
||||
|
||||
class DefaultValidatingParser(argparse.ArgumentParser):
|
||||
def parse_known_args(self, args=None, namespace=None):
|
||||
for action in self._actions:
|
||||
if not namespace or action.dest not in namespace:
|
||||
if action.default is not None:
|
||||
try:
|
||||
self._check_value(action, action.default)
|
||||
except argparse.ArgumentError as e:
|
||||
logger.error(f"You have an invalid setting in your configuration file ({action.dest}):")
|
||||
logger.error(e)
|
||||
exit()
|
||||
|
||||
def to_dot_notation(yaml_conf: CommentedMap | dict) -> argparse.ArgumentParser:
|
||||
return super().parse_known_args(args, namespace)
|
||||
|
||||
|
||||
def to_dot_notation(yaml_conf: CommentedMap | dict) -> dict:
|
||||
dotdict = {}
|
||||
|
||||
def process_subdict(subdict, prefix=""):
|
||||
|
|
|
@ -18,7 +18,7 @@ from .context import ArchivingContext
|
|||
|
||||
from .metadata import Metadata
|
||||
from ..version import __version__
|
||||
from .config import read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG
|
||||
from .config import read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG, DefaultValidatingParser
|
||||
from .module import available_modules, LazyBaseModule, MODULE_TYPES, get_module
|
||||
from . import validators
|
||||
from .module import BaseModule
|
||||
|
@ -56,12 +56,12 @@ class ArchivingOrchestrator:
|
|||
parser.add_argument('--mode', action='store', dest='mode', type=str, choices=['simple', 'full'], help='the mode to run the archiver in', default='simple')
|
||||
# override the default 'help' so we can inject all the configs and show those
|
||||
parser.add_argument('-h', '--help', action='store_true', dest='help', help='show this help message and exit')
|
||||
parser.add_argument('-s', '--store', dest='store', default=True, help='Store the created config in the config file', action=argparse.BooleanOptionalAction)
|
||||
parser.add_argument('-s', '--store', dest='store', default=False, help='Store the created config in the config file', action=argparse.BooleanOptionalAction)
|
||||
|
||||
self.basic_parser = parser
|
||||
|
||||
def setup_complete_parser(self, basic_config: dict, yaml_config: dict, unused_args: list[str]) -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
parser = DefaultValidatingParser(
|
||||
add_help=False,
|
||||
)
|
||||
self.add_additional_args(parser)
|
||||
|
@ -149,6 +149,7 @@ class ArchivingOrchestrator:
|
|||
# this module has no configs, don't show anything in the help
|
||||
# (TODO: do we want to show something about this module though, like a description?)
|
||||
continue
|
||||
|
||||
group = parser.add_argument_group(module.display_name or module.name, f"{module.description[:100]}...")
|
||||
|
||||
for name, kwargs in module.configs.items():
|
||||
|
@ -157,6 +158,10 @@ class ArchivingOrchestrator:
|
|||
do_not_store = kwargs.pop('do_not_store', False)
|
||||
if do_not_store:
|
||||
self._do_not_store_keys.append((module.name, name))
|
||||
|
||||
if not kwargs.get('metavar', None):
|
||||
# make a nicer metavar, metavar is what's used in the help, e.g. --cli_feeder.urls [METAVAR]
|
||||
kwargs['metavar'] = name.upper()
|
||||
|
||||
kwargs.pop('cli_set', None)
|
||||
should_store = kwargs.pop('should_store', False)
|
||||
|
@ -248,8 +253,6 @@ class ArchivingOrchestrator:
|
|||
if basic_config.help:
|
||||
self.show_help()
|
||||
|
||||
logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
|
||||
|
||||
# load the config file
|
||||
yaml_config = {}
|
||||
|
||||
|
@ -257,8 +260,11 @@ class ArchivingOrchestrator:
|
|||
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
|
||||
exit()
|
||||
|
||||
|
||||
yaml_config = read_yaml(basic_config.config_file)
|
||||
self.setup_complete_parser(basic_config, yaml_config, unused_args)
|
||||
|
||||
logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
|
||||
self.install_modules()
|
||||
|
||||
# log out the modules that were loaded
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
"nargs": "+",
|
||||
"required": True,
|
||||
"do_not_store": True,
|
||||
"metavar": "INPUT URLS",
|
||||
},
|
||||
},
|
||||
"description": """
|
||||
|
|
|
@ -5,11 +5,12 @@
|
|||
"external_dependencies": {
|
||||
"python": ["loguru", "requests"],
|
||||
},
|
||||
"entry_point": "wayback_enricher::WaybackExtractorEnricher",
|
||||
"configs": {
|
||||
"timeout": {"default": 15, "help": "seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually."},
|
||||
"if_not_archived_within": {"default": None, "help": "only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information: https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA"},
|
||||
"key": {"default": None, "help": "wayback API key. to get credentials visit https://archive.org/account/s3.php"},
|
||||
"secret": {"default": None, "help": "wayback API secret. to get credentials visit https://archive.org/account/s3.php"},
|
||||
"key": {"default": None, "required": True, "help": "wayback API key. to get credentials visit https://archive.org/account/s3.php"},
|
||||
"secret": {"default": None, "required": True, "help": "wayback API secret. to get credentials visit https://archive.org/account/s3.php"},
|
||||
"proxy_http": {"default": None, "help": "http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port"},
|
||||
"proxy_https": {"default": None, "help": "https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port"},
|
||||
},
|
||||
|
|
Ładowanie…
Reference in New Issue