kopia lustrzana https://github.com/bellingcat/auto-archiver
Merge branch 'load_modules' into more_mainifests
# Conflicts: # src/auto_archiver/databases/__init__.pypull/183/head
commit
c517d35bdf
|
@ -1,6 +0,0 @@
|
||||||
from . import archivers, databases, enrichers, feeders, formatters, storages, utils, core
|
|
||||||
|
|
||||||
# need to manually specify due to cyclical deps
|
|
||||||
from .core.orchestrator import ArchivingOrchestrator
|
|
||||||
# making accessible directly
|
|
||||||
from .core.metadata import Metadata
|
|
|
@ -1,6 +1,5 @@
|
||||||
""" Entry point for the auto_archiver package. """
|
""" Entry point for the auto_archiver package. """
|
||||||
from . import ArchivingOrchestrator
|
from auto_archiver.core.orchestrator import ArchivingOrchestrator
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
ArchivingOrchestrator().run()
|
ArchivingOrchestrator().run()
|
||||||
|
|
|
@ -1,10 +1,6 @@
|
||||||
""" Core modules to handle things such as orchestration, metadata and configs..
|
""" Core modules to handle things such as orchestration, metadata and configs..
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from .metadata import Metadata
|
|
||||||
from .media import Media
|
|
||||||
from .step import Step
|
|
||||||
from .context import ArchivingContext
|
|
||||||
|
|
||||||
# cannot import ArchivingOrchestrator/Config to avoid circular dep
|
# cannot import ArchivingOrchestrator/Config to avoid circular dep
|
||||||
# from .orchestrator import ArchivingOrchestrator
|
# from .orchestrator import ArchivingOrchestrator
|
||||||
|
|
|
@ -4,10 +4,13 @@ It supports CLI argument parsing, loading from YAML file, and overrides to allow
|
||||||
flexible setup in various environments.
|
flexible setup in various environments.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import argparse
|
|
||||||
from configparser import ConfigParser
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import yaml
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
from .loader import MODULE_TYPES
|
||||||
|
|
||||||
# configurable_parents = [
|
# configurable_parents = [
|
||||||
# Feeder,
|
# Feeder,
|
||||||
|
@ -47,21 +50,53 @@ from dataclasses import dataclass, field
|
||||||
# parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
|
# parser.add_argument('--config', action='store', dest='config', help='the filename of the YAML configuration file (defaults to \'config.yaml\')', default='orchestration.yaml')
|
||||||
# parser.add_argument('--version', action='version', version=__version__)
|
# parser.add_argument('--version', action='version', version=__version__)
|
||||||
|
|
||||||
|
EMPTY_CONFIG = {
|
||||||
|
"steps": dict((f"{module_type}s", []) for module_type in MODULE_TYPES)
|
||||||
|
}
|
||||||
class LoadFromFile (argparse.Action):
|
class LoadFromFile (argparse.Action):
|
||||||
def __call__ (self, parser, namespace, values, option_string = None):
|
def __call__ (self, parser, namespace, values, option_string = None):
|
||||||
with values as f:
|
with values as f:
|
||||||
# parse arguments in the file and store them in the target namespace
|
# parse arguments in the file and store them in the target namespace
|
||||||
parser.parse_args(f.read().split(), namespace)
|
parser.parse_args(f.read().split(), namespace)
|
||||||
|
|
||||||
def read_config(config_filename: str) -> dict:
|
def to_dot_notation(yaml_conf: str) -> argparse.ArgumentParser:
|
||||||
config = ConfigParser()
|
dotdict = {}
|
||||||
config.read(config_filename)
|
|
||||||
# setup basic format
|
def process_subdict(subdict, prefix=""):
|
||||||
if 'STEPS' not in config.sections():
|
for key, value in subdict.items():
|
||||||
config.add_section("STEPS")
|
if type(value) == dict:
|
||||||
|
process_subdict(value, f"{prefix}{key}.")
|
||||||
|
else:
|
||||||
|
dotdict[f"{prefix}{key}"] = value
|
||||||
|
|
||||||
|
process_subdict(yaml_conf)
|
||||||
|
return dotdict
|
||||||
|
|
||||||
|
def merge_dicts(dotdict, yaml_dict):
|
||||||
|
def process_subdict(subdict, prefix=""):
|
||||||
|
for key, value in subdict.items():
|
||||||
|
if "." in key:
|
||||||
|
keys = key.split(".")
|
||||||
|
subdict = yaml_dict
|
||||||
|
for k in keys[:-1]:
|
||||||
|
subdict = subdict.setdefault(k, {})
|
||||||
|
subdict[keys[-1]] = value
|
||||||
|
else:
|
||||||
|
yaml_dict[key] = value
|
||||||
|
|
||||||
|
process_subdict(dotdict)
|
||||||
|
return yaml_dict
|
||||||
|
|
||||||
|
def read_yaml(yaml_filename: str) -> dict:
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(yaml_filename, "r", encoding="utf-8") as inf:
|
||||||
|
config = yaml.safe_load(inf)
|
||||||
|
except FileNotFoundError:
|
||||||
|
config = EMPTY_CONFIG
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
def store_config(config: ConfigParser, config_filename: str):
|
def store_yaml(config: dict, yaml_filename: str):
|
||||||
with open(config_filename, "w", encoding="utf-8") as outf:
|
with open(yaml_filename, "w", encoding="utf-8") as outf:
|
||||||
config.write(outf)
|
yaml.dump(config, outf, default_flow_style=False)
|
|
@ -4,12 +4,14 @@ import os
|
||||||
import copy
|
import copy
|
||||||
from os.path import join, dirname
|
from os.path import join, dirname
|
||||||
from typing import List
|
from typing import List
|
||||||
|
from loguru import logger
|
||||||
|
import sys
|
||||||
|
import shutil
|
||||||
|
|
||||||
MODULE_TYPES = [
|
MODULE_TYPES = [
|
||||||
'feeder',
|
'feeder',
|
||||||
'enricher',
|
'enricher',
|
||||||
'archiver',
|
'extractor',
|
||||||
'database',
|
'database',
|
||||||
'storage',
|
'storage',
|
||||||
'formatter'
|
'formatter'
|
||||||
|
@ -59,10 +61,47 @@ class Module:
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"Module<'{self.display_name}' ({self.name})>"
|
return f"Module<'{self.display_name}' ({self.name})>"
|
||||||
|
|
||||||
|
def load_modules(modules):
|
||||||
|
modules = available_modules(limit_to_modules=modules, with_manifest=True)
|
||||||
|
for module in modules:
|
||||||
|
_load_module(module)
|
||||||
|
|
||||||
|
def _load_module(module):
|
||||||
|
# first make sure that the 'depends' are installed and available in sys.args
|
||||||
|
for dependency in module.depends:
|
||||||
|
if dependency not in sys.modules:
|
||||||
|
logger.error(f"""
|
||||||
|
Module {module.name} depends on {dependency} which is not available.
|
||||||
|
|
||||||
|
Have you set up the '{module.name}' module correctly? See the README for more information.
|
||||||
|
""")
|
||||||
|
exit()
|
||||||
|
# then check the external dependencies, these are binary dependencies that should be available on the path
|
||||||
|
for dep_type, deps in module.external_dependencies.items():
|
||||||
|
if dep_type == 'python':
|
||||||
|
for dep in deps:
|
||||||
|
if dep not in sys.modules:
|
||||||
|
logger.error(f"""
|
||||||
|
Module {module.name} requires {dep} which is not available.
|
||||||
|
|
||||||
|
Have you installed the required dependencies for the '{module.name}' module? See the README for more information.
|
||||||
|
""")
|
||||||
|
|
||||||
|
elif dep_type == 'binary':
|
||||||
|
for dep in deps:
|
||||||
|
if not shutil.which(dep):
|
||||||
|
logger.error(f"""
|
||||||
|
Module {module.name} requires {dep} which is not available.
|
||||||
|
|
||||||
|
Have you installed the required dependencies for the '{module.name}' module? See the README for more information.
|
||||||
|
""")
|
||||||
|
# finally, load the module
|
||||||
|
logger.info(f"Loading module {module.display_name}")
|
||||||
|
module = __import__(module.entry_point, fromlist=[module.entry_point])
|
||||||
|
logger.info(f"Module {module.display_name} loaded")
|
||||||
|
|
||||||
def load_manifest(module_path):
|
def load_manifest(module_path):
|
||||||
print(f"Loading manifest for module {module_path}")
|
# print(f"Loading manifest for module {module_path}")
|
||||||
# load the manifest file
|
# load the manifest file
|
||||||
manifest = copy.deepcopy(_DEFAULT_MANIFEST)
|
manifest = copy.deepcopy(_DEFAULT_MANIFEST)
|
||||||
|
|
||||||
|
@ -70,7 +109,7 @@ def load_manifest(module_path):
|
||||||
manifest.update(ast.literal_eval(f.read()))
|
manifest.update(ast.literal_eval(f.read()))
|
||||||
return manifest
|
return manifest
|
||||||
|
|
||||||
def available_modules(additional_paths: List[str] = [], with_manifest: bool=False) -> List[Module]:
|
def available_modules(with_manifest: bool=False, limit_to_modules: List[str]= [], additional_paths: List[str] = [], ) -> List[Module]:
|
||||||
# search through all valid 'modules' paths. Default is 'modules' in the current directory
|
# search through all valid 'modules' paths. Default is 'modules' in the current directory
|
||||||
|
|
||||||
# see odoo/modules/module.py -> get_modules
|
# see odoo/modules/module.py -> get_modules
|
||||||
|
@ -83,7 +122,16 @@ def available_modules(additional_paths: List[str] = [], with_manifest: bool=Fals
|
||||||
|
|
||||||
for module_folder in default_path + additional_paths:
|
for module_folder in default_path + additional_paths:
|
||||||
# walk through each module in module_folder and check if it has a valid manifest
|
# walk through each module in module_folder and check if it has a valid manifest
|
||||||
for possible_module in os.listdir(module_folder):
|
try:
|
||||||
|
possible_modules = os.listdir(module_folder)
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.warning(f"Module folder {module_folder} does not exist")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for possible_module in possible_modules:
|
||||||
|
if limit_to_modules and possible_module not in limit_to_modules:
|
||||||
|
continue
|
||||||
|
|
||||||
possible_module_path = join(module_folder, possible_module)
|
possible_module_path = join(module_folder, possible_module)
|
||||||
if not is_really_module(possible_module_path):
|
if not is_really_module(possible_module_path):
|
||||||
continue
|
continue
|
||||||
|
@ -94,4 +142,8 @@ def available_modules(additional_paths: List[str] = [], with_manifest: bool=Fals
|
||||||
manifest = {}
|
manifest = {}
|
||||||
all_modules.append(Module(possible_module, possible_module_path, manifest))
|
all_modules.append(Module(possible_module, possible_module_path, manifest))
|
||||||
|
|
||||||
|
for module in limit_to_modules:
|
||||||
|
if not any(module == m.name for m in all_modules):
|
||||||
|
logger.warning(f"Module {module} not found in available modules. Are you sure it's installed?")
|
||||||
|
|
||||||
return all_modules
|
return all_modules
|
|
@ -11,9 +11,6 @@ from dataclasses import dataclass, field
|
||||||
from dataclasses_json import dataclass_json, config
|
from dataclasses_json import dataclass_json, config
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
|
||||||
import ffmpeg
|
|
||||||
from ffmpeg._run import Error
|
|
||||||
|
|
||||||
from .context import ArchivingContext
|
from .context import ArchivingContext
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
@ -106,6 +103,12 @@ class Media:
|
||||||
return self.mimetype.startswith("image")
|
return self.mimetype.startswith("image")
|
||||||
|
|
||||||
def is_valid_video(self) -> bool:
|
def is_valid_video(self) -> bool:
|
||||||
|
# Note: this is intentional, to only import ffmpeg here - when the method is called
|
||||||
|
# this speeds up loading the module. We check that 'ffmpeg' is available on startup
|
||||||
|
# when we load each manifest file
|
||||||
|
import ffmpeg
|
||||||
|
from ffmpeg._run import Error
|
||||||
|
|
||||||
# checks for video streams with ffmpeg, or min file size for a video
|
# checks for video streams with ffmpeg, or min file size for a video
|
||||||
# self.is_video() should be used together with this method
|
# self.is_video() should be used together with this method
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -9,7 +9,6 @@ from typing import Generator, Union, List
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from ipaddress import ip_address
|
from ipaddress import ip_address
|
||||||
import argparse
|
import argparse
|
||||||
import configparser
|
|
||||||
import os
|
import os
|
||||||
from os.path import join, dirname
|
from os.path import join, dirname
|
||||||
|
|
||||||
|
@ -17,16 +16,10 @@ from rich_argparse import RichHelpFormatter
|
||||||
|
|
||||||
from .context import ArchivingContext
|
from .context import ArchivingContext
|
||||||
|
|
||||||
from ..archivers import Archiver
|
|
||||||
from ..feeders import Feeder
|
|
||||||
from ..formatters import Formatter
|
|
||||||
from ..storages import Storage
|
|
||||||
from ..enrichers import Enricher
|
|
||||||
from ..databases import Database
|
|
||||||
from .metadata import Metadata
|
from .metadata import Metadata
|
||||||
from ..version import __version__
|
from ..version import __version__
|
||||||
from .config import read_config, store_config
|
from .config import read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG
|
||||||
from .loader import available_modules, Module, MODULE_TYPES
|
from .loader import available_modules, Module, MODULE_TYPES, load_modules
|
||||||
|
|
||||||
import tempfile, traceback
|
import tempfile, traceback
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
@ -69,72 +62,76 @@ class ArchivingOrchestrator:
|
||||||
parser.add_argument('-s', '--store', action='store_true', dest='store', help='Store the created config in the config file')
|
parser.add_argument('-s', '--store', action='store_true', dest='store', help='Store the created config in the config file')
|
||||||
self.basic_parser = parser
|
self.basic_parser = parser
|
||||||
|
|
||||||
def setup_complete_parser(self, basic_config: dict, ini_config: dict, unused_args: list[str]) -> None:
|
def setup_complete_parser(self, basic_config: dict, yaml_config: dict, unused_args: list[str]) -> None:
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
parents = [self.basic_parser],
|
parents = [self.basic_parser],
|
||||||
add_help=False,
|
add_help=False,
|
||||||
)
|
)
|
||||||
|
self.add_steps_args(parser)
|
||||||
|
|
||||||
# check what mode we're in
|
# check what mode we're in
|
||||||
# if we have a config file, use that to decide which modules to load
|
# if we have a config file, use that to decide which modules to load
|
||||||
# if simple, we'll load just the modules that has requires_setup = False
|
# if simple, we'll load just the modules that has requires_setup = False
|
||||||
# if full, we'll load all modules
|
# if full, we'll load all modules
|
||||||
if ini_config:
|
if yaml_config != EMPTY_CONFIG:
|
||||||
# only load the modules enabled in config
|
# only load the modules enabled in config
|
||||||
|
# TODO: if some steps are empty (e.g. 'feeders' is empty), should we default to the 'simple' ones? Or only if they are ALL empty?
|
||||||
enabled_modules = []
|
enabled_modules = []
|
||||||
for module_type in MODULE_TYPES:
|
for module_type in MODULE_TYPES:
|
||||||
try:
|
enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", []))
|
||||||
enabled_modules.extend(ini_config.get("STEPS", module_type))
|
|
||||||
except configparser.NoOptionError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# add in any extra modules that have been passed on the command line for 'feeders', 'enrichers', 'archivers', 'databases', 'storages', 'formatter'
|
# add in any extra modules that have been passed on the command line for 'feeders', 'enrichers', 'archivers', 'databases', 'storages', 'formatter'
|
||||||
for module_type in MODULE_TYPES:
|
for module_type in MODULE_TYPES:
|
||||||
if modules := getattr(basic_config, f"{module_type}s", []):
|
if modules := getattr(basic_config, f"{module_type}s", []):
|
||||||
enabled_modules.extend(modules)
|
enabled_modules.extend(modules)
|
||||||
|
|
||||||
self.add_module_args(available_modules(enabled_modules, with_manifest=True), parser)
|
self.add_module_args(available_modules(with_manifest=True, limit_to_modules=enabled_modules), parser)
|
||||||
elif basic_config.mode == 'simple':
|
elif basic_config.mode == 'simple':
|
||||||
simple_modules = [module for module in available_modules(with_manifest=True) if not module.requires_setup]
|
simple_modules = [module for module in available_modules(with_manifest=True) if not module.requires_setup]
|
||||||
self.add_module_args(simple_modules, parser)
|
self.add_module_args(simple_modules, parser)
|
||||||
# add them to the config
|
# add them to the config
|
||||||
for module in simple_modules:
|
for module in simple_modules:
|
||||||
for module_type in module.type:
|
for module_type in module.type:
|
||||||
existing_modules = config['STEPS'] = module.name
|
yaml_config['steps'].setdefault(f"{module_type}s", []).append(module.name)
|
||||||
ini_config.setdefault(f"{module_type}s", []).append(module.name)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# load all modules, they're not using the 'simple' mode
|
# load all modules, they're not using the 'simple' mode
|
||||||
self.add_module_args(available_modules(with_manifest=True), parser)
|
self.add_module_args(available_modules(with_manifest=True), parser)
|
||||||
|
|
||||||
parser.set_defaults(**ini_config)
|
|
||||||
|
parser.set_defaults(**to_dot_notation(yaml_config))
|
||||||
|
|
||||||
# reload the parser with the new arguments, now that we have them
|
# reload the parser with the new arguments, now that we have them
|
||||||
self.config, unknown = parser.parse_known_args(unused_args)
|
parsed, unknown = parser.parse_known_args(unused_args)
|
||||||
if unknown:
|
if unknown:
|
||||||
logger.warning(f"Ignoring unknown/unused arguments: {unknown}")
|
logger.warning(f"Ignoring unknown/unused arguments: {unknown}\nPerhaps you don't have this module enabled?")
|
||||||
|
|
||||||
if self.config and basic_config.store or not os.path.isfile(join(dirname(__file__), basic_config.config_file)):
|
# merge the new config with the old one
|
||||||
|
yaml_config = merge_dicts(vars(parsed), yaml_config)
|
||||||
|
|
||||||
|
if basic_config.store or not os.path.isfile(join(dirname(__file__), basic_config.config_file)):
|
||||||
logger.info(f"Storing configuration file to {basic_config.config_file}")
|
logger.info(f"Storing configuration file to {basic_config.config_file}")
|
||||||
store_config(ini_config, basic_config.config_file)
|
store_yaml(yaml_config, basic_config.config_file)
|
||||||
breakpoint()
|
|
||||||
logger.info(f"FEEDER: {self.config.feeders}")
|
self.config = yaml_config
|
||||||
logger.info(f"ENRICHERS: {self.config.enrichers}")
|
|
||||||
logger.info(f"ARCHIVERS: {self.config.archivers}")
|
logger.info("FEEDERS: " + ", ".join(self.config['steps']['feeders']))
|
||||||
logger.info(f"DATABASES: {self.config.databases}")
|
logger.info("EXTRACTORS: " + ", ".join(self.config['steps']['extractors']))
|
||||||
logger.info(f"STORAGES: {self.config.storages}")
|
logger.info("ENRICHERS: " + ", ".join(self.config['steps']['enrichers']))
|
||||||
logger.info(f"FORMATTER: {self.formatter.name}")
|
logger.info("DATABASES: " + ", ".join(self.config['steps']['databases']))
|
||||||
|
logger.info("STORAGES: " + ", ".join(self.config['steps']['storages']))
|
||||||
|
logger.info("FORMATTERS: " + ", ".join(self.config['steps']['formatters']))
|
||||||
|
return self.config
|
||||||
|
|
||||||
def add_steps_args(self, parser: argparse.ArgumentParser = None):
|
def add_steps_args(self, parser: argparse.ArgumentParser = None):
|
||||||
if not parser:
|
if not parser:
|
||||||
parser = self.parser
|
parser = self.parser
|
||||||
|
|
||||||
parser.add_argument('--feeders', action='store', dest='feeders', nargs='+', required=True, help='the feeders to use')
|
parser.add_argument('--feeders', action='store', dest='steps.feeders', nargs='+', required=True, help='the feeders to use')
|
||||||
parser.add_argument('--enrichers', action='store', dest='enrichers', nargs='+', required=True, help='the enrichers to use')
|
parser.add_argument('--enrichers', action='store', dest='steps.enrichers', nargs='+', required=True, help='the enrichers to use')
|
||||||
parser.add_argument('--archivers', action='store', dest='archivers', nargs='+', required=True, help='the archivers to use')
|
parser.add_argument('--extractors', action='store', dest='steps.extractors', nargs='+', required=True, help='the extractors to use')
|
||||||
parser.add_argument('--databases', action='store', dest='databases', nargs='+', required=True, help='the databases to use')
|
parser.add_argument('--databases', action='store', dest='steps.databases', nargs='+', required=True, help='the databases to use')
|
||||||
parser.add_argument('--storages', action='store', dest='storages', nargs='+', required=True, help='the storages to use')
|
parser.add_argument('--storages', action='store', dest='steps.storages', nargs='+', required=True, help='the storages to use')
|
||||||
parser.add_argument('--formatter', action='store', dest='formatter', nargs='+', required=True, help='the formatter to use')
|
parser.add_argument('--formatters', action='store', dest='steps.formatters', nargs='+', required=True, help='the formatter to use')
|
||||||
|
|
||||||
def add_module_args(self, modules: list[Module] = None, parser: argparse.ArgumentParser = None):
|
def add_module_args(self, modules: list[Module] = None, parser: argparse.ArgumentParser = None):
|
||||||
|
|
||||||
|
@ -166,6 +163,12 @@ class ArchivingOrchestrator:
|
||||||
self.basic_parser.print_help()
|
self.basic_parser.print_help()
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
def install_modules(self):
|
||||||
|
modules = set()
|
||||||
|
[modules.update(*m) for m in self.config['steps'].values()]
|
||||||
|
|
||||||
|
load_modules(modules)
|
||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
self.setup_basic_parser()
|
self.setup_basic_parser()
|
||||||
|
|
||||||
|
@ -179,19 +182,18 @@ class ArchivingOrchestrator:
|
||||||
self.show_help()
|
self.show_help()
|
||||||
|
|
||||||
# load the config file
|
# load the config file
|
||||||
ini_config = {}
|
yaml_config = {}
|
||||||
|
|
||||||
try:
|
if not os.path.exists(basic_config.config_file) and basic_config.config_file != DEFAULT_CONFIG_FILE:
|
||||||
ini_config = read_config(basic_config.config_file)
|
|
||||||
except FileNotFoundError:
|
|
||||||
if basic_config.config_file != DEFAULT_CONFIG_FILE:
|
|
||||||
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
|
logger.error(f"The configuration file {basic_config.config_file} was not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
self.setup_complete_parser(basic_config, ini_config, unused_args)
|
yaml_config = read_yaml(basic_config.config_file)
|
||||||
|
|
||||||
config.parse()
|
breakpoint()
|
||||||
|
self.setup_complete_parser(basic_config, yaml_config, unused_args)
|
||||||
|
|
||||||
|
self.install_modules()
|
||||||
|
|
||||||
for item in self.feed():
|
for item in self.feed():
|
||||||
pass
|
pass
|
||||||
|
@ -201,7 +203,8 @@ class ArchivingOrchestrator:
|
||||||
for a in self.all_archivers_for_setup(): a.cleanup()
|
for a in self.all_archivers_for_setup(): a.cleanup()
|
||||||
|
|
||||||
def feed(self) -> Generator[Metadata]:
|
def feed(self) -> Generator[Metadata]:
|
||||||
for item in self.feeder:
|
for feeder in self.config['steps']['feeders']:
|
||||||
|
for item in feeder:
|
||||||
yield self.feed_item(item)
|
yield self.feed_item(item)
|
||||||
self.cleanup()
|
self.cleanup()
|
||||||
|
|
||||||
|
|
|
@ -10,15 +10,3 @@ Enrichers are optional but highly useful for making the archived data more power
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from .enricher import Enricher
|
|
||||||
from .screenshot_enricher import ScreenshotEnricher
|
|
||||||
from .wayback_enricher import WaybackArchiverEnricher
|
|
||||||
from .hash_enricher import HashEnricher
|
|
||||||
from .thumbnail_enricher import ThumbnailEnricher
|
|
||||||
from .wacz_enricher import WaczArchiverEnricher
|
|
||||||
from .whisper_enricher import WhisperEnricher
|
|
||||||
from .pdq_hash_enricher import PdqHashEnricher
|
|
||||||
from .metadata_enricher import MetadataEnricher
|
|
||||||
from .meta_enricher import MetaEnricher
|
|
||||||
from .ssl_enricher import SSLEnricher
|
|
||||||
from .timestamping_enricher import TimestampingEnricher
|
|
|
@ -1,7 +1,3 @@
|
||||||
""" Feeders handle the input of media into the Auto Archiver.
|
""" Feeders handle the input of media into the Auto Archiver.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from.feeder import Feeder
|
|
||||||
from .gsheet_feeder import GsheetsFeeder
|
|
||||||
from .cli_feeder import CLIFeeder
|
|
||||||
from .atlos_feeder import AtlosFeeder
|
|
|
@ -1,4 +1 @@
|
||||||
""" Formatters for the output of the content. """
|
""" Formatters for the output of the content. """
|
||||||
from .formatter import Formatter
|
|
||||||
from .html_formatter import HtmlFormatter
|
|
||||||
from .mute_formatter import MuteFormatter
|
|
|
@ -1,8 +1,3 @@
|
||||||
""" This module contains the storage classes for the auto-archiver.
|
""" This module contains the storage classes for the auto-archiver.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from .storage import Storage
|
|
||||||
from .s3 import S3Storage
|
|
||||||
from .local import LocalStorage
|
|
||||||
from .gd import GDriveStorage
|
|
||||||
from .atlos import AtlosStorage
|
|
Ładowanie…
Reference in New Issue