kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix loading already loaded modules - don't load them twice
rodzic
550097ab7b
commit
65ef46d01e
|
@ -10,6 +10,8 @@ from loguru import logger
|
|||
import sys
|
||||
import shutil
|
||||
|
||||
_LOADED_MODULES = {}
|
||||
|
||||
MODULE_TYPES = [
|
||||
'feeder',
|
||||
'enricher',
|
||||
|
@ -68,6 +70,9 @@ class Module:
|
|||
|
||||
def load_module(module: str) -> object: # TODO: change return type to Step
|
||||
|
||||
if module in _LOADED_MODULES:
|
||||
return _LOADED_MODULES[module]
|
||||
|
||||
# load a module by name
|
||||
module = get_module(module)
|
||||
if not module:
|
||||
|
@ -83,11 +88,11 @@ def load_module(module: str) -> object: # TODO: change return type to Step
|
|||
check_deps(module.dependencies.get('bin', []), lambda dep: shutil.which(dep))
|
||||
|
||||
qualname = f'auto_archiver.modules.{module.name}'
|
||||
if qualname in sys.modules:
|
||||
return
|
||||
|
||||
logger.info(f"Loading module '{module.display_name}'...")
|
||||
loaded_module = __import__(qualname)
|
||||
return getattr(sys.modules[qualname], module.entry_point)()
|
||||
_LOADED_MODULES[module.name] = getattr(sys.modules[qualname], module.entry_point)()
|
||||
return _LOADED_MODULES[module.name]
|
||||
|
||||
|
||||
# finally, load the module
|
||||
|
@ -144,6 +149,6 @@ def available_modules(with_manifest: bool=False, limit_to_modules: List[str]= []
|
|||
if not suppress_warnings:
|
||||
for module in limit_to_modules:
|
||||
if not any(module == m.name for m in all_modules):
|
||||
logger.warning(f"Module {module} not found in available modules. Are you sure it's installed?")
|
||||
logger.warning(f"Module '{module}' not found in available modules. Are you sure it's installed?")
|
||||
|
||||
return all_modules
|
|
@ -85,7 +85,7 @@ class ArchivingOrchestrator:
|
|||
if modules := getattr(basic_config, f"{module_type}s", []):
|
||||
enabled_modules.extend(modules)
|
||||
|
||||
self.add_module_args(available_modules(with_manifest=True, limit_to_modules=enabled_modules), parser)
|
||||
self.add_module_args(available_modules(with_manifest=True, limit_to_modules=set(enabled_modules)), parser)
|
||||
elif basic_config.mode == 'simple':
|
||||
simple_modules = [module for module in available_modules(with_manifest=True) if not module.requires_setup]
|
||||
self.add_module_args(simple_modules, parser)
|
||||
|
@ -98,6 +98,7 @@ class ArchivingOrchestrator:
|
|||
self.add_module_args(available_modules(with_manifest=True), parser)
|
||||
|
||||
|
||||
breakpoint()
|
||||
parser.set_defaults(**to_dot_notation(yaml_config))
|
||||
|
||||
# reload the parser with the new arguments, now that we have them
|
||||
|
@ -106,13 +107,13 @@ class ArchivingOrchestrator:
|
|||
logger.warning(f"Ignoring unknown/unused arguments: {unknown}\nPerhaps you don't have this module enabled?")
|
||||
|
||||
# merge the new config with the old one
|
||||
yaml_config = merge_dicts(vars(parsed), yaml_config)
|
||||
merged_yaml_config = merge_dicts(vars(parsed), yaml_config)
|
||||
|
||||
if basic_config.store or not os.path.isfile(join(dirname(__file__), basic_config.config_file)):
|
||||
if (merged_yaml_config != yaml_config and basic_config.store) or not os.path.isfile(basic_config.config_file):
|
||||
logger.info(f"Storing configuration file to {basic_config.config_file}")
|
||||
store_yaml(yaml_config, basic_config.config_file)
|
||||
|
||||
self.config = yaml_config
|
||||
self.config = merged_yaml_config
|
||||
|
||||
return self.config
|
||||
|
||||
|
@ -120,12 +121,12 @@ class ArchivingOrchestrator:
|
|||
if not parser:
|
||||
parser = self.parser
|
||||
|
||||
parser.add_argument('--feeders', action='store', dest='steps.feeders', nargs='+', required=True, help='the feeders to use')
|
||||
parser.add_argument('--enrichers', action='store', dest='steps.enrichers', nargs='+', required=True, help='the enrichers to use')
|
||||
parser.add_argument('--extractors', action='store', dest='steps.extractors', nargs='+', required=True, help='the extractors to use')
|
||||
parser.add_argument('--databases', action='store', dest='steps.databases', nargs='+', required=True, help='the databases to use')
|
||||
parser.add_argument('--storages', action='store', dest='steps.storages', nargs='+', required=True, help='the storages to use')
|
||||
parser.add_argument('--formatters', action='store', dest='steps.formatters', nargs='+', required=True, help='the formatter to use')
|
||||
parser.add_argument('--feeders', action='store', dest='steps.feeders', nargs='+', help='the feeders to use')
|
||||
parser.add_argument('--enrichers', action='store', dest='steps.enrichers', nargs='+', help='the enrichers to use')
|
||||
parser.add_argument('--extractors', action='store', dest='steps.extractors', nargs='+', help='the extractors to use')
|
||||
parser.add_argument('--databases', action='store', dest='steps.databases', nargs='+', help='the databases to use')
|
||||
parser.add_argument('--storages', action='store', dest='steps.storages', nargs='+', help='the storages to use')
|
||||
parser.add_argument('--formatters', action='store', dest='steps.formatters', nargs='+', help='the formatter to use')
|
||||
|
||||
def add_module_args(self, modules: list[Module] = None, parser: argparse.ArgumentParser = None):
|
||||
|
||||
|
@ -163,6 +164,8 @@ class ArchivingOrchestrator:
|
|||
"""
|
||||
|
||||
for module_type in MODULE_TYPES:
|
||||
if module_type == 'enricher':
|
||||
breakpoint()
|
||||
step_items = []
|
||||
modules_to_load = self.config['steps'][f"{module_type}s"]
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
'name': 'Generic Extractor',
|
||||
'version': '0.1.0',
|
||||
'author': 'Bellingcat',
|
||||
'type': ['extractor'],
|
||||
'type': ['extractor', 'feeder', 'enricher'],
|
||||
'entry_point': 'GenericExtractor', # this class should be present in the __init__.py
|
||||
'requires_setup': False,
|
||||
'dependencies': {
|
||||
|
|
Ładowanie…
Reference in New Issue