Allow framework for config settings that should not be stored in config (e.g. cli_feeder.urls

Use 'do_not_store': True in the config settings to apply this. Also: fix up generic archiver dropins loading + local_storage defaults (same as what's in example orchestration)
pull/224/head
Patrick Robertson 2025-01-28 11:14:12 +01:00
rodzic 57b3bec935
commit 1d2a1d4db7
5 zmienionych plików z 35 dodań i 24 usunięć

Wyświetl plik

@ -11,7 +11,7 @@ from ruamel.yaml import YAML, CommentedMap, add_representer
from copy import deepcopy
from .module import MODULE_TYPES
from typing import Any, List, Type
from typing import Any, List, Type, Tuple
yaml = YAML()
@ -101,6 +101,15 @@ def read_yaml(yaml_filename: str) -> CommentedMap:
return config
def store_yaml(config: CommentedMap, yaml_filename: str):
# TODO: make this tidier/find a way to notify of which keys should not be stored
def store_yaml(config: CommentedMap, yaml_filename: str, do_not_store_keys: List[Tuple[str, str]] = []) -> None:
config_to_save = deepcopy(config)
for key1, key2 in do_not_store_keys:
if key1 in config_to_save and key2 in config_to_save[key1]:
del config_to_save[key1][key2]
with open(yaml_filename, "w", encoding="utf-8") as outf:
yaml.dump(config, outf)
yaml.dump(config_to_save, outf)

Wyświetl plik

@ -39,20 +39,7 @@ class UniqueAppendAction(argparse.Action):
class ArchivingOrchestrator:
# def __init__(self, config: Config) -> None:
# self.feeder: Feeder = config.feeder
# self.formatter: Formatter = config.formatter
# self.enrichers: List[Enricher] = config.enrichers
# self.archivers: List[Archiver] = config.archivers
# self.databases: List[Database] = config.databases
# self.storages: List[Storage] = config.storages
# ArchivingContext.set("storages", self.storages, keep_on_reset=True)
# try:
# for a in self.all_archivers_for_setup(): a.setup()
# except (KeyboardInterrupt, Exception) as e:
# logger.error(f"Error during setup of archivers: {e}\n{traceback.format_exc()}")
# self.cleanup()
_do_not_store_keys = []
def setup_basic_parser(self):
parser = argparse.ArgumentParser(
@ -125,10 +112,10 @@ class ArchivingOrchestrator:
if unknown:
logger.warning(f"Ignoring unknown/unused arguments: {unknown}\nPerhaps you don't have this module enabled?")
if (self.config != yaml_config and basic_config.store) or not os.path.isfile(basic_config.config_file):
logger.info(f"Storing configuration file to {basic_config.config_file}")
store_yaml(self.config, basic_config.config_file)
store_yaml(self.config, basic_config.config_file, self._do_not_store_keys)
return self.config
@ -167,6 +154,10 @@ class ArchivingOrchestrator:
for name, kwargs in module.configs.items():
# TODO: go through all the manifests and make sure we're not breaking anything with removing cli_set
# in most cases it'll mean replacing it with 'type': 'str' or 'type': 'int' or something
do_not_store = kwargs.pop('do_not_store', False)
if do_not_store:
self._do_not_store_keys.append((module.name, name))
kwargs.pop('cli_set', None)
should_store = kwargs.pop('should_store', False)
kwargs['dest'] = f"{module.name}.{kwargs.pop('dest', name)}"
@ -193,7 +184,7 @@ class ArchivingOrchestrator:
logging_config = self.config['logging']
logger.add(sys.stderr, level=logging_config['level'])
if log_file := logging_config['file']:
logger.add(log_file, rotation=logging_config['logging.rotation'])
logger.add(log_file) if not logging_config['rotation'] else logger.add(log_file, rotation=logging_config['rotation'])
def install_modules(self):
@ -221,7 +212,14 @@ class ArchivingOrchestrator:
if module in invalid_modules:
continue
loaded_module: BaseModule = get_module(module).load()
loaded_module.setup(self.config)
try:
loaded_module.setup(self.config)
except (KeyboardInterrupt, Exception) as e:
logger.error(f"Error during setup of archivers: {e}\n{traceback.format_exc()}")
if module_type == 'extractor':
loaded_module.cleanup()
exit()
if not loaded_module:
invalid_modules.append(module)
continue

Wyświetl plik

@ -11,6 +11,7 @@
"help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
"nargs": "+",
"required": True,
"do_not_store": True,
},
},
"description": """

Wyświetl plik

@ -121,6 +121,7 @@ class GenericExtractor(Extractor):
ie_instance = info_extractor(downloader=ydl)
dropin = self.dropin_for_name(info_extractor.ie_key())
if not dropin:
# TODO: add a proper link to 'how to create your own dropin'
logger.debug(f"""Could not find valid dropin for {info_extractor.IE_NAME}.
@ -172,6 +173,8 @@ class GenericExtractor(Extractor):
def dropin_for_name(self, dropin_name: str, additional_paths = [], package=__package__) -> Type[InfoExtractor]:
dropin_name = dropin_name.lower()
if dropin_name == "generic":
# no need for a dropin for the generic extractor (?)
return None

Wyświetl plik

@ -7,16 +7,16 @@
},
"configs": {
"path_generator": {
"default": "url",
"default": "flat",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
"choices": ["flat", "url", "random"],
},
"filename_generator": {
"default": "random",
"default": "static",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
"choices": ["random", "static"],
},
"save_to": {"default": "./archived", "help": "folder where to save archived content"},
"save_to": {"default": "./local_archive", "help": "folder where to save archived content"},
"save_absolute": {"default": False, "help": "whether the path to the stored file is absolute or relative in the output result inc. formatters (WARN: leaks the file structure)"},
},
"description": """