kopia lustrzana https://github.com/bellingcat/auto-archiver
Validate orchestration.yaml file inputs - so if a user enters invalid values, it also validates them
rodzic
1d2a1d4db7
commit
27b25c5bd4
|
@ -8,6 +8,8 @@ flexible setup in various environments.
|
||||||
import argparse
|
import argparse
|
||||||
from ruamel.yaml import YAML, CommentedMap, add_representer
|
from ruamel.yaml import YAML, CommentedMap, add_representer
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from .module import MODULE_TYPES
|
from .module import MODULE_TYPES
|
||||||
|
|
||||||
|
@ -30,8 +32,22 @@ logging:
|
||||||
""")
|
""")
|
||||||
# note: 'logging' is explicitly added above in order to better format the config file
|
# note: 'logging' is explicitly added above in order to better format the config file
|
||||||
|
|
||||||
|
class DefaultValidatingParser(argparse.ArgumentParser):
|
||||||
|
def parse_known_args(self, args=None, namespace=None):
|
||||||
|
for action in self._actions:
|
||||||
|
if not namespace or action.dest not in namespace:
|
||||||
|
if action.default is not None:
|
||||||
|
try:
|
||||||
|
self._check_value(action, action.default)
|
||||||
|
except argparse.ArgumentError as e:
|
||||||
|
logger.error(f"You have an invalid setting in your configuration file ({action.dest}):")
|
||||||
|
logger.error(e)
|
||||||
|
exit()
|
||||||
|
|
||||||
def to_dot_notation(yaml_conf: CommentedMap | dict) -> argparse.ArgumentParser:
|
return super().parse_known_args(args, namespace)
|
||||||
|
|
||||||
|
|
||||||
|
def to_dot_notation(yaml_conf: CommentedMap | dict) -> dict:
|
||||||
dotdict = {}
|
dotdict = {}
|
||||||
|
|
||||||
def process_subdict(subdict, prefix=""):
|
def process_subdict(subdict, prefix=""):
|
||||||
|
|
|
@ -18,7 +18,7 @@ from .context import ArchivingContext
|
||||||
|
|
||||||
from .metadata import Metadata
|
from .metadata import Metadata
|
||||||
from ..version import __version__
|
from ..version import __version__
|
||||||
from .config import read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG
|
from .config import read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG, DefaultValidatingParser
|
||||||
from .module import available_modules, LazyBaseModule, MODULE_TYPES, get_module
|
from .module import available_modules, LazyBaseModule, MODULE_TYPES, get_module
|
||||||
from . import validators
|
from . import validators
|
||||||
from .module import BaseModule
|
from .module import BaseModule
|
||||||
|
@ -56,12 +56,12 @@ class ArchivingOrchestrator:
|
||||||
parser.add_argument('--mode', action='store', dest='mode', type=str, choices=['simple', 'full'], help='the mode to run the archiver in', default='simple')
|
parser.add_argument('--mode', action='store', dest='mode', type=str, choices=['simple', 'full'], help='the mode to run the archiver in', default='simple')
|
||||||
# override the default 'help' so we can inject all the configs and show those
|
# override the default 'help' so we can inject all the configs and show those
|
||||||
parser.add_argument('-h', '--help', action='store_true', dest='help', help='show this help message and exit')
|
parser.add_argument('-h', '--help', action='store_true', dest='help', help='show this help message and exit')
|
||||||
parser.add_argument('-s', '--store', dest='store', default=True, help='Store the created config in the config file', action=argparse.BooleanOptionalAction)
|
parser.add_argument('-s', '--store', dest='store', default=False, help='Store the created config in the config file', action=argparse.BooleanOptionalAction)
|
||||||
|
|
||||||
self.basic_parser = parser
|
self.basic_parser = parser
|
||||||
|
|
||||||
def setup_complete_parser(self, basic_config: dict, yaml_config: dict, unused_args: list[str]) -> None:
|
def setup_complete_parser(self, basic_config: dict, yaml_config: dict, unused_args: list[str]) -> None:
|
||||||
parser = argparse.ArgumentParser(
|
parser = DefaultValidatingParser(
|
||||||
add_help=False,
|
add_help=False,
|
||||||
)
|
)
|
||||||
self.add_additional_args(parser)
|
self.add_additional_args(parser)
|
||||||
|
@ -149,6 +149,7 @@ class ArchivingOrchestrator:
|
||||||
# this module has no configs, don't show anything in the help
|
# this module has no configs, don't show anything in the help
|
||||||
# (TODO: do we want to show something about this module though, like a description?)
|
# (TODO: do we want to show something about this module though, like a description?)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
group = parser.add_argument_group(module.display_name or module.name, f"{module.description[:100]}...")
|
group = parser.add_argument_group(module.display_name or module.name, f"{module.description[:100]}...")
|
||||||
|
|
||||||
for name, kwargs in module.configs.items():
|
for name, kwargs in module.configs.items():
|
||||||
|
@ -158,6 +159,10 @@ class ArchivingOrchestrator:
|
||||||
if do_not_store:
|
if do_not_store:
|
||||||
self._do_not_store_keys.append((module.name, name))
|
self._do_not_store_keys.append((module.name, name))
|
||||||
|
|
||||||
|
if not kwargs.get('metavar', None):
|
||||||
|
# make a nicer metavar, metavar is what's used in the help, e.g. --cli_feeder.urls [METAVAR]
|
||||||
|
kwargs['metavar'] = name.upper()
|
||||||
|
|
||||||
kwargs.pop('cli_set', None)
|
kwargs.pop('cli_set', None)
|
||||||
should_store = kwargs.pop('should_store', False)
|
should_store = kwargs.pop('should_store', False)
|
||||||
kwargs['dest'] = f"{module.name}.{kwargs.pop('dest', name)}"
|
kwargs['dest'] = f"{module.name}.{kwargs.pop('dest', name)}"
|
||||||
|
@ -248,8 +253,6 @@ class ArchivingOrchestrator:
|
||||||
if basic_config.help:
|
if basic_config.help:
|
||||||
self.show_help()
|
self.show_help()
|
||||||
|
|
||||||
logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
|
|
||||||
|
|
||||||
# load the config file
|
# load the config file
|
||||||
yaml_config = {}
|
yaml_config = {}
|
||||||
|
|
||||||
|
@ -257,8 +260,11 @@ class ArchivingOrchestrator:
|
||||||
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
|
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
|
||||||
yaml_config = read_yaml(basic_config.config_file)
|
yaml_config = read_yaml(basic_config.config_file)
|
||||||
self.setup_complete_parser(basic_config, yaml_config, unused_args)
|
self.setup_complete_parser(basic_config, yaml_config, unused_args)
|
||||||
|
|
||||||
|
logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
|
||||||
self.install_modules()
|
self.install_modules()
|
||||||
|
|
||||||
# log out the modules that were loaded
|
# log out the modules that were loaded
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
"nargs": "+",
|
"nargs": "+",
|
||||||
"required": True,
|
"required": True,
|
||||||
"do_not_store": True,
|
"do_not_store": True,
|
||||||
|
"metavar": "INPUT URLS",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"description": """
|
"description": """
|
||||||
|
|
|
@ -5,11 +5,12 @@
|
||||||
"external_dependencies": {
|
"external_dependencies": {
|
||||||
"python": ["loguru", "requests"],
|
"python": ["loguru", "requests"],
|
||||||
},
|
},
|
||||||
|
"entry_point": "wayback_enricher::WaybackExtractorEnricher",
|
||||||
"configs": {
|
"configs": {
|
||||||
"timeout": {"default": 15, "help": "seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually."},
|
"timeout": {"default": 15, "help": "seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually."},
|
||||||
"if_not_archived_within": {"default": None, "help": "only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information: https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA"},
|
"if_not_archived_within": {"default": None, "help": "only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information: https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA"},
|
||||||
"key": {"default": None, "help": "wayback API key. to get credentials visit https://archive.org/account/s3.php"},
|
"key": {"default": None, "required": True, "help": "wayback API key. to get credentials visit https://archive.org/account/s3.php"},
|
||||||
"secret": {"default": None, "help": "wayback API secret. to get credentials visit https://archive.org/account/s3.php"},
|
"secret": {"default": None, "required": True, "help": "wayback API secret. to get credentials visit https://archive.org/account/s3.php"},
|
||||||
"proxy_http": {"default": None, "help": "http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port"},
|
"proxy_http": {"default": None, "help": "http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port"},
|
||||||
"proxy_https": {"default": None, "help": "https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port"},
|
"proxy_https": {"default": None, "help": "https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port"},
|
||||||
},
|
},
|
||||||
|
|
Ładowanie…
Reference in New Issue