kopia lustrzana https://github.com/bellingcat/auto-archiver
Merge pull request #210 from bellingcat/logger_fix
Fix issue #200 + Refactor _LAZY_LOADED_MODULESpull/216/head v0.13.4
commit
5211c5de18
|
@ -1,6 +1,6 @@
|
||||||
# iterate through all the modules in auto_archiver.modules and turn the __manifest__.py file into a markdown table
|
# iterate through all the modules in auto_archiver.modules and turn the __manifest__.py file into a markdown table
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from auto_archiver.core.module import available_modules
|
from auto_archiver.core.module import ModuleFactory
|
||||||
from auto_archiver.core.base_module import BaseModule
|
from auto_archiver.core.base_module import BaseModule
|
||||||
from ruamel.yaml import YAML
|
from ruamel.yaml import YAML
|
||||||
import io
|
import io
|
||||||
|
@ -41,7 +41,7 @@ def generate_module_docs():
|
||||||
configs_cheatsheet = "\n## Configuration Options\n"
|
configs_cheatsheet = "\n## Configuration Options\n"
|
||||||
configs_cheatsheet += header_row
|
configs_cheatsheet += header_row
|
||||||
|
|
||||||
for module in sorted(available_modules(with_manifest=True), key=lambda x: (x.requires_setup, x.name)):
|
for module in sorted(ModuleFactory().available_modules(), key=lambda x: (x.requires_setup, x.name)):
|
||||||
# generate the markdown file from the __manifest__.py file.
|
# generate the markdown file from the __manifest__.py file.
|
||||||
|
|
||||||
manifest = module.manifest
|
manifest = module.manifest
|
||||||
|
|
|
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "auto-archiver"
|
name = "auto-archiver"
|
||||||
version = "0.13.3"
|
version = "0.13.4"
|
||||||
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
||||||
|
|
||||||
requires-python = ">=3.10,<3.13"
|
requires-python = ">=3.10,<3.13"
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
"""
|
"""
|
||||||
from .metadata import Metadata
|
from .metadata import Metadata
|
||||||
from .media import Media
|
from .media import Media
|
||||||
from .module import BaseModule
|
from .base_module import BaseModule
|
||||||
|
|
||||||
# cannot import ArchivingOrchestrator/Config to avoid circular dep
|
# cannot import ArchivingOrchestrator/Config to avoid circular dep
|
||||||
# from .orchestrator import ArchivingOrchestrator
|
# from .orchestrator import ArchivingOrchestrator
|
||||||
|
|
|
@ -1,13 +1,18 @@
|
||||||
|
|
||||||
from urllib.parse import urlparse
|
from __future__ import annotations
|
||||||
from typing import Mapping, Any
|
|
||||||
|
from typing import Mapping, Any, Type, TYPE_CHECKING
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
from copy import deepcopy, copy
|
from copy import deepcopy, copy
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
from auto_archiver.utils import url as UrlUtil
|
from auto_archiver.utils import url as UrlUtil
|
||||||
|
from auto_archiver.core.consts import MODULE_TYPES as CONF_MODULE_TYPES
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .module import ModuleFactory
|
||||||
|
|
||||||
class BaseModule(ABC):
|
class BaseModule(ABC):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -17,41 +22,24 @@ class BaseModule(ABC):
|
||||||
however modules can have a .setup() method to run any setup code
|
however modules can have a .setup() method to run any setup code
|
||||||
(e.g. logging in to a site, spinning up a browser etc.)
|
(e.g. logging in to a site, spinning up a browser etc.)
|
||||||
|
|
||||||
See BaseModule.MODULE_TYPES for the types of modules you can create, noting that
|
See consts.MODULE_TYPES for the types of modules you can create, noting that
|
||||||
a subclass can be of multiple types. For example, a module that extracts data from
|
a subclass can be of multiple types. For example, a module that extracts data from
|
||||||
a website and stores it in a database would be both an 'extractor' and a 'database' module.
|
a website and stores it in a database would be both an 'extractor' and a 'database' module.
|
||||||
|
|
||||||
Each module is a python package, and should have a __manifest__.py file in the
|
Each module is a python package, and should have a __manifest__.py file in the
|
||||||
same directory as the module file. The __manifest__.py specifies the module information
|
same directory as the module file. The __manifest__.py specifies the module information
|
||||||
like name, author, version, dependencies etc. See BaseModule._DEFAULT_MANIFEST for the
|
like name, author, version, dependencies etc. See DEFAULT_MANIFEST for the
|
||||||
default manifest structure.
|
default manifest structure.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
MODULE_TYPES = [
|
MODULE_TYPES = CONF_MODULE_TYPES
|
||||||
'feeder',
|
|
||||||
'extractor',
|
|
||||||
'enricher',
|
|
||||||
'database',
|
|
||||||
'storage',
|
|
||||||
'formatter'
|
|
||||||
]
|
|
||||||
|
|
||||||
_DEFAULT_MANIFEST = {
|
|
||||||
'name': '', # the display name of the module
|
|
||||||
'author': 'Bellingcat', # creator of the module, leave this as Bellingcat or set your own name!
|
|
||||||
'type': [], # the type of the module, can be one or more of BaseModule.MODULE_TYPES
|
|
||||||
'requires_setup': True, # whether or not this module requires additional setup such as setting API Keys or installing additional softare
|
|
||||||
'description': '', # a description of the module
|
|
||||||
'dependencies': {}, # external dependencies, e.g. python packages or binaries, in dictionary format
|
|
||||||
'entry_point': '', # the entry point for the module, in the format 'module_name::ClassName'. This can be left blank to use the default entry point of module_name::ModuleName
|
|
||||||
'version': '1.0', # the version of the module
|
|
||||||
'configs': {} # any configuration options this module has, these will be exposed to the user in the config file or via the command line
|
|
||||||
}
|
|
||||||
|
|
||||||
|
# NOTE: these here are declard as class variables, but they are overridden by the instance variables in the __init__ method
|
||||||
config: Mapping[str, Any]
|
config: Mapping[str, Any]
|
||||||
authentication: Mapping[str, Mapping[str, str]]
|
authentication: Mapping[str, Mapping[str, str]]
|
||||||
name: str
|
name: str
|
||||||
|
module_factory: ModuleFactory
|
||||||
|
|
||||||
# this is set by the orchestrator prior to archiving
|
# this is set by the orchestrator prior to archiving
|
||||||
tmp_dir: TemporaryDirectory = None
|
tmp_dir: TemporaryDirectory = None
|
||||||
|
|
|
@ -11,7 +11,7 @@ from ruamel.yaml import YAML, CommentedMap, add_representer
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from .module import BaseModule
|
from auto_archiver.core.consts import MODULE_TYPES
|
||||||
|
|
||||||
from typing import Any, List, Type, Tuple
|
from typing import Any, List, Type, Tuple
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ EMPTY_CONFIG = _yaml.load("""
|
||||||
# Auto Archiver Configuration
|
# Auto Archiver Configuration
|
||||||
# Steps are the modules that will be run in the order they are defined
|
# Steps are the modules that will be run in the order they are defined
|
||||||
|
|
||||||
steps:""" + "".join([f"\n {module}s: []" for module in BaseModule.MODULE_TYPES]) + \
|
steps:""" + "".join([f"\n {module}s: []" for module in MODULE_TYPES]) + \
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Global configuration
|
# Global configuration
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
|
||||||
|
MODULE_TYPES = [
|
||||||
|
'feeder',
|
||||||
|
'extractor',
|
||||||
|
'enricher',
|
||||||
|
'database',
|
||||||
|
'storage',
|
||||||
|
'formatter'
|
||||||
|
]
|
||||||
|
|
||||||
|
MANIFEST_FILE = "__manifest__.py"
|
||||||
|
|
||||||
|
DEFAULT_MANIFEST = {
|
||||||
|
'name': '', # the display name of the module
|
||||||
|
'author': 'Bellingcat', # creator of the module, leave this as Bellingcat or set your own name!
|
||||||
|
'type': [], # the type of the module, can be one or more of MODULE_TYPES
|
||||||
|
'requires_setup': True, # whether or not this module requires additional setup such as setting API Keys or installing additional softare
|
||||||
|
'description': '', # a description of the module
|
||||||
|
'dependencies': {}, # external dependencies, e.g. python packages or binaries, in dictionary format
|
||||||
|
'entry_point': '', # the entry point for the module, in the format 'module_name::ClassName'. This can be left blank to use the default entry point of module_name::ModuleName
|
||||||
|
'version': '1.0', # the version of the module
|
||||||
|
'configs': {} # any configuration options this module has, these will be exposed to the user in the config file or via the command line
|
||||||
|
}
|
|
@ -6,7 +6,7 @@ by handling user configuration, validating the steps properties, and implementin
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List
|
from typing import List, TYPE_CHECKING
|
||||||
import shutil
|
import shutil
|
||||||
import ast
|
import ast
|
||||||
import copy
|
import copy
|
||||||
|
@ -16,99 +16,113 @@ import os
|
||||||
from os.path import join
|
from os.path import join
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
import auto_archiver
|
import auto_archiver
|
||||||
from .base_module import BaseModule
|
from auto_archiver.core.consts import DEFAULT_MANIFEST, MANIFEST_FILE
|
||||||
|
|
||||||
_LAZY_LOADED_MODULES = {}
|
if TYPE_CHECKING:
|
||||||
|
from .base_module import BaseModule
|
||||||
MANIFEST_FILE = "__manifest__.py"
|
|
||||||
|
|
||||||
|
|
||||||
def setup_paths(paths: list[str]) -> None:
|
HAS_SETUP_PATHS = False
|
||||||
"""
|
|
||||||
Sets up the paths for the modules to be loaded from
|
|
||||||
|
|
||||||
This is necessary for the modules to be imported correctly
|
|
||||||
|
|
||||||
"""
|
|
||||||
for path in paths:
|
|
||||||
# check path exists, if it doesn't, log a warning
|
|
||||||
if not os.path.exists(path):
|
|
||||||
logger.warning(f"Path '{path}' does not exist. Skipping...")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# see odoo/module/module.py -> initialize_sys_path
|
class ModuleFactory:
|
||||||
if path not in auto_archiver.modules.__path__:
|
|
||||||
auto_archiver.modules.__path__.append(path)
|
|
||||||
|
|
||||||
# sort based on the length of the path, so that the longest path is last in the list
|
def __init__(self):
|
||||||
auto_archiver.modules.__path__ = sorted(auto_archiver.modules.__path__, key=len, reverse=True)
|
self._lazy_modules = {}
|
||||||
|
|
||||||
def get_module(module_name: str, config: dict) -> BaseModule:
|
def setup_paths(self, paths: list[str]) -> None:
|
||||||
"""
|
"""
|
||||||
Gets and sets up a module using the provided config
|
Sets up the paths for the modules to be loaded from
|
||||||
|
|
||||||
This will actually load and instantiate the module, and load all its dependencies (i.e. not lazy)
|
This is necessary for the modules to be imported correctly
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return get_module_lazy(module_name).load(config)
|
global HAS_SETUP_PATHS
|
||||||
|
|
||||||
def get_module_lazy(module_name: str, suppress_warnings: bool = False) -> LazyBaseModule:
|
for path in paths:
|
||||||
"""
|
# check path exists, if it doesn't, log a warning
|
||||||
Lazily loads a module, returning a LazyBaseModule
|
if not os.path.exists(path):
|
||||||
|
logger.warning(f"Path '{path}' does not exist. Skipping...")
|
||||||
This has all the information about the module, but does not load the module itself or its dependencies
|
|
||||||
|
|
||||||
To load an actual module, call .setup() on a lazy module
|
|
||||||
|
|
||||||
"""
|
|
||||||
if module_name in _LAZY_LOADED_MODULES:
|
|
||||||
return _LAZY_LOADED_MODULES[module_name]
|
|
||||||
|
|
||||||
available = available_modules(limit_to_modules=[module_name], suppress_warnings=suppress_warnings)
|
|
||||||
if not available:
|
|
||||||
raise IndexError(f"Module '{module_name}' not found. Are you sure it's installed/exists?")
|
|
||||||
return available[0]
|
|
||||||
|
|
||||||
def available_modules(with_manifest: bool=False, limit_to_modules: List[str]= [], suppress_warnings: bool = False) -> List[LazyBaseModule]:
|
|
||||||
|
|
||||||
# search through all valid 'modules' paths. Default is 'modules' in the current directory
|
|
||||||
|
|
||||||
# see odoo/modules/module.py -> get_modules
|
|
||||||
def is_really_module(module_path):
|
|
||||||
if os.path.isfile(join(module_path, MANIFEST_FILE)):
|
|
||||||
return True
|
|
||||||
|
|
||||||
all_modules = []
|
|
||||||
|
|
||||||
for module_folder in auto_archiver.modules.__path__:
|
|
||||||
# walk through each module in module_folder and check if it has a valid manifest
|
|
||||||
try:
|
|
||||||
possible_modules = os.listdir(module_folder)
|
|
||||||
except FileNotFoundError:
|
|
||||||
logger.warning(f"Module folder {module_folder} does not exist")
|
|
||||||
continue
|
|
||||||
|
|
||||||
for possible_module in possible_modules:
|
|
||||||
if limit_to_modules and possible_module not in limit_to_modules:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
possible_module_path = join(module_folder, possible_module)
|
# see odoo/module/module.py -> initialize_sys_path
|
||||||
if not is_really_module(possible_module_path):
|
if path not in auto_archiver.modules.__path__:
|
||||||
|
if HAS_SETUP_PATHS == True:
|
||||||
|
logger.warning(f"You are attempting to re-initialise the module paths with: '{path}' for a 2nd time. \
|
||||||
|
This could lead to unexpected behaviour. It is recommended to only use a single modules path. \
|
||||||
|
If you wish to load modules from different paths then load a 2nd python interpreter (e.g. using multiprocessing).")
|
||||||
|
auto_archiver.modules.__path__.append(path)
|
||||||
|
|
||||||
|
# sort based on the length of the path, so that the longest path is last in the list
|
||||||
|
auto_archiver.modules.__path__ = sorted(auto_archiver.modules.__path__, key=len, reverse=True)
|
||||||
|
|
||||||
|
HAS_SETUP_PATHS = True
|
||||||
|
|
||||||
|
def get_module(self, module_name: str, config: dict) -> BaseModule:
|
||||||
|
"""
|
||||||
|
Gets and sets up a module using the provided config
|
||||||
|
|
||||||
|
This will actually load and instantiate the module, and load all its dependencies (i.e. not lazy)
|
||||||
|
|
||||||
|
"""
|
||||||
|
return self.get_module_lazy(module_name).load(config)
|
||||||
|
|
||||||
|
def get_module_lazy(self, module_name: str, suppress_warnings: bool = False) -> LazyBaseModule:
|
||||||
|
"""
|
||||||
|
Lazily loads a module, returning a LazyBaseModule
|
||||||
|
|
||||||
|
This has all the information about the module, but does not load the module itself or its dependencies
|
||||||
|
|
||||||
|
To load an actual module, call .setup() on a lazy module
|
||||||
|
|
||||||
|
"""
|
||||||
|
if module_name in self._lazy_modules:
|
||||||
|
return self._lazy_modules[module_name]
|
||||||
|
|
||||||
|
available = self.available_modules(limit_to_modules=[module_name], suppress_warnings=suppress_warnings)
|
||||||
|
if not available:
|
||||||
|
raise IndexError(f"Module '{module_name}' not found. Are you sure it's installed/exists?")
|
||||||
|
return available[0]
|
||||||
|
|
||||||
|
def available_modules(self, limit_to_modules: List[str]= [], suppress_warnings: bool = False) -> List[LazyBaseModule]:
|
||||||
|
|
||||||
|
# search through all valid 'modules' paths. Default is 'modules' in the current directory
|
||||||
|
|
||||||
|
# see odoo/modules/module.py -> get_modules
|
||||||
|
def is_really_module(module_path):
|
||||||
|
if os.path.isfile(join(module_path, MANIFEST_FILE)):
|
||||||
|
return True
|
||||||
|
|
||||||
|
all_modules = []
|
||||||
|
|
||||||
|
for module_folder in auto_archiver.modules.__path__:
|
||||||
|
# walk through each module in module_folder and check if it has a valid manifest
|
||||||
|
try:
|
||||||
|
possible_modules = os.listdir(module_folder)
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.warning(f"Module folder {module_folder} does not exist")
|
||||||
continue
|
continue
|
||||||
if _LAZY_LOADED_MODULES.get(possible_module):
|
|
||||||
continue
|
|
||||||
lazy_module = LazyBaseModule(possible_module, possible_module_path)
|
|
||||||
|
|
||||||
_LAZY_LOADED_MODULES[possible_module] = lazy_module
|
for possible_module in possible_modules:
|
||||||
|
if limit_to_modules and possible_module not in limit_to_modules:
|
||||||
|
continue
|
||||||
|
|
||||||
all_modules.append(lazy_module)
|
possible_module_path = join(module_folder, possible_module)
|
||||||
|
if not is_really_module(possible_module_path):
|
||||||
if not suppress_warnings:
|
continue
|
||||||
for module in limit_to_modules:
|
if self._lazy_modules.get(possible_module):
|
||||||
if not any(module == m.name for m in all_modules):
|
continue
|
||||||
logger.warning(f"Module '{module}' not found. Are you sure it's installed?")
|
lazy_module = LazyBaseModule(possible_module, possible_module_path, factory=self)
|
||||||
|
|
||||||
return all_modules
|
self._lazy_modules[possible_module] = lazy_module
|
||||||
|
|
||||||
|
all_modules.append(lazy_module)
|
||||||
|
|
||||||
|
if not suppress_warnings:
|
||||||
|
for module in limit_to_modules:
|
||||||
|
if not any(module == m.name for m in all_modules):
|
||||||
|
logger.warning(f"Module '{module}' not found. Are you sure it's installed?")
|
||||||
|
|
||||||
|
return all_modules
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class LazyBaseModule:
|
class LazyBaseModule:
|
||||||
|
@ -123,14 +137,16 @@ class LazyBaseModule:
|
||||||
type: list
|
type: list
|
||||||
description: str
|
description: str
|
||||||
path: str
|
path: str
|
||||||
|
module_factory: ModuleFactory
|
||||||
|
|
||||||
_manifest: dict = None
|
_manifest: dict = None
|
||||||
_instance: BaseModule = None
|
_instance: BaseModule = None
|
||||||
_entry_point: str = None
|
_entry_point: str = None
|
||||||
|
|
||||||
def __init__(self, module_name, path):
|
def __init__(self, module_name, path, factory: ModuleFactory):
|
||||||
self.name = module_name
|
self.name = module_name
|
||||||
self.path = path
|
self.path = path
|
||||||
|
self.module_factory = factory
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def entry_point(self):
|
def entry_point(self):
|
||||||
|
@ -161,7 +177,7 @@ class LazyBaseModule:
|
||||||
return self._manifest
|
return self._manifest
|
||||||
# print(f"Loading manifest for module {module_path}")
|
# print(f"Loading manifest for module {module_path}")
|
||||||
# load the manifest file
|
# load the manifest file
|
||||||
manifest = copy.deepcopy(BaseModule._DEFAULT_MANIFEST)
|
manifest = copy.deepcopy(DEFAULT_MANIFEST)
|
||||||
|
|
||||||
with open(join(self.path, MANIFEST_FILE)) as f:
|
with open(join(self.path, MANIFEST_FILE)) as f:
|
||||||
try:
|
try:
|
||||||
|
@ -189,13 +205,14 @@ class LazyBaseModule:
|
||||||
# clear out any empty strings that a user may have erroneously added
|
# clear out any empty strings that a user may have erroneously added
|
||||||
continue
|
continue
|
||||||
if not check(dep):
|
if not check(dep):
|
||||||
logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available/setup. Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
|
logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available/setup. \
|
||||||
|
Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
def check_python_dep(dep):
|
def check_python_dep(dep):
|
||||||
# first check if it's a module:
|
# first check if it's a module:
|
||||||
try:
|
try:
|
||||||
m = get_module_lazy(dep, suppress_warnings=True)
|
m = self.module_factory.get_module_lazy(dep, suppress_warnings=True)
|
||||||
try:
|
try:
|
||||||
# we must now load this module and set it up with the config
|
# we must now load this module and set it up with the config
|
||||||
m.load(config)
|
m.load(config)
|
||||||
|
@ -230,19 +247,21 @@ class LazyBaseModule:
|
||||||
__import__(f'{qualname}.{file_name}', fromlist=[self.entry_point])
|
__import__(f'{qualname}.{file_name}', fromlist=[self.entry_point])
|
||||||
# finally, get the class instance
|
# finally, get the class instance
|
||||||
instance: BaseModule = getattr(sys.modules[sub_qualname], class_name)()
|
instance: BaseModule = getattr(sys.modules[sub_qualname], class_name)()
|
||||||
if not getattr(instance, 'name', None):
|
|
||||||
instance.name = self.name
|
|
||||||
|
|
||||||
if not getattr(instance, 'display_name', None):
|
|
||||||
instance.display_name = self.display_name
|
|
||||||
|
|
||||||
self._instance = instance
|
|
||||||
|
|
||||||
|
# set the name, display name and module factory
|
||||||
|
instance.name = self.name
|
||||||
|
instance.display_name = self.display_name
|
||||||
|
instance.module_factory = self.module_factory
|
||||||
|
|
||||||
# merge the default config with the user config
|
# merge the default config with the user config
|
||||||
default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
|
default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
|
||||||
|
|
||||||
config[self.name] = default_config | config.get(self.name, {})
|
config[self.name] = default_config | config.get(self.name, {})
|
||||||
instance.config_setup(config)
|
instance.config_setup(config)
|
||||||
instance.setup()
|
instance.setup()
|
||||||
|
|
||||||
|
# save the instance for future easy loading
|
||||||
|
self._instance = instance
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from typing import Generator, Union, List, Type
|
from typing import Generator, Union, List, Type, TYPE_CHECKING
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from ipaddress import ip_address
|
from ipaddress import ip_address
|
||||||
from copy import copy
|
from copy import copy
|
||||||
|
@ -22,12 +22,14 @@ from rich_argparse import RichHelpFormatter
|
||||||
from .metadata import Metadata, Media
|
from .metadata import Metadata, Media
|
||||||
from auto_archiver.version import __version__
|
from auto_archiver.version import __version__
|
||||||
from .config import _yaml, read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG, DefaultValidatingParser
|
from .config import _yaml, read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG, DefaultValidatingParser
|
||||||
from .module import available_modules, LazyBaseModule, get_module, setup_paths
|
from .module import ModuleFactory, LazyBaseModule
|
||||||
from . import validators, Feeder, Extractor, Database, Storage, Formatter, Enricher
|
from . import validators, Feeder, Extractor, Database, Storage, Formatter, Enricher
|
||||||
from .module import BaseModule
|
from .consts import MODULE_TYPES
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .base_module import BaseModule
|
||||||
|
from .module import LazyBaseModule
|
||||||
|
|
||||||
DEFAULT_CONFIG_FILE = "orchestration.yaml"
|
DEFAULT_CONFIG_FILE = "orchestration.yaml"
|
||||||
|
|
||||||
|
@ -95,6 +97,12 @@ class UniqueAppendAction(argparse.Action):
|
||||||
|
|
||||||
class ArchivingOrchestrator:
|
class ArchivingOrchestrator:
|
||||||
|
|
||||||
|
# instance variables
|
||||||
|
module_factory: ModuleFactory
|
||||||
|
setup_finished: bool
|
||||||
|
logger_id: int
|
||||||
|
|
||||||
|
# instance variables, used for convenience to access modules by step
|
||||||
feeders: List[Type[Feeder]]
|
feeders: List[Type[Feeder]]
|
||||||
extractors: List[Type[Extractor]]
|
extractors: List[Type[Extractor]]
|
||||||
enrichers: List[Type[Enricher]]
|
enrichers: List[Type[Enricher]]
|
||||||
|
@ -102,6 +110,11 @@ class ArchivingOrchestrator:
|
||||||
storages: List[Type[Storage]]
|
storages: List[Type[Storage]]
|
||||||
formatters: List[Type[Formatter]]
|
formatters: List[Type[Formatter]]
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.module_factory = ModuleFactory()
|
||||||
|
self.setup_finished = False
|
||||||
|
self.logger_id = None
|
||||||
|
|
||||||
def setup_basic_parser(self):
|
def setup_basic_parser(self):
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog="auto-archiver",
|
prog="auto-archiver",
|
||||||
|
@ -133,7 +146,7 @@ class ArchivingOrchestrator:
|
||||||
)
|
)
|
||||||
self.add_modules_args(modules_parser)
|
self.add_modules_args(modules_parser)
|
||||||
cli_modules, unused_args = modules_parser.parse_known_args(unused_args)
|
cli_modules, unused_args = modules_parser.parse_known_args(unused_args)
|
||||||
for module_type in BaseModule.MODULE_TYPES:
|
for module_type in MODULE_TYPES:
|
||||||
yaml_config['steps'][f"{module_type}s"] = getattr(cli_modules, f"{module_type}s", []) or yaml_config['steps'].get(f"{module_type}s", [])
|
yaml_config['steps'][f"{module_type}s"] = getattr(cli_modules, f"{module_type}s", []) or yaml_config['steps'].get(f"{module_type}s", [])
|
||||||
|
|
||||||
parser = DefaultValidatingParser(
|
parser = DefaultValidatingParser(
|
||||||
|
@ -155,15 +168,15 @@ class ArchivingOrchestrator:
|
||||||
# TODO: if some steps are empty (e.g. 'feeders' is empty), should we default to the 'simple' ones? Or only if they are ALL empty?
|
# TODO: if some steps are empty (e.g. 'feeders' is empty), should we default to the 'simple' ones? Or only if they are ALL empty?
|
||||||
enabled_modules = []
|
enabled_modules = []
|
||||||
# first loads the modules from the config file, then from the command line
|
# first loads the modules from the config file, then from the command line
|
||||||
for module_type in BaseModule.MODULE_TYPES:
|
for module_type in MODULE_TYPES:
|
||||||
enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", []))
|
enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", []))
|
||||||
|
|
||||||
# clear out duplicates, but keep the order
|
# clear out duplicates, but keep the order
|
||||||
enabled_modules = list(dict.fromkeys(enabled_modules))
|
enabled_modules = list(dict.fromkeys(enabled_modules))
|
||||||
avail_modules = available_modules(with_manifest=True, limit_to_modules=enabled_modules, suppress_warnings=True)
|
avail_modules = self.module_factory.available_modules(limit_to_modules=enabled_modules, suppress_warnings=True)
|
||||||
self.add_individual_module_args(avail_modules, parser)
|
self.add_individual_module_args(avail_modules, parser)
|
||||||
elif basic_config.mode == 'simple':
|
elif basic_config.mode == 'simple':
|
||||||
simple_modules = [module for module in available_modules(with_manifest=True) if not module.requires_setup]
|
simple_modules = [module for module in self.module_factory.available_modules() if not module.requires_setup]
|
||||||
self.add_individual_module_args(simple_modules, parser)
|
self.add_individual_module_args(simple_modules, parser)
|
||||||
|
|
||||||
# for simple mode, we use the cli_feeder and any modules that don't require setup
|
# for simple mode, we use the cli_feeder and any modules that don't require setup
|
||||||
|
@ -176,7 +189,7 @@ class ArchivingOrchestrator:
|
||||||
yaml_config['steps'].setdefault(f"{module_type}s", []).append(module.name)
|
yaml_config['steps'].setdefault(f"{module_type}s", []).append(module.name)
|
||||||
else:
|
else:
|
||||||
# load all modules, they're not using the 'simple' mode
|
# load all modules, they're not using the 'simple' mode
|
||||||
self.add_individual_module_args(available_modules(with_manifest=True), parser)
|
self.add_individual_module_args(self.module_factory.available_modules(), parser)
|
||||||
|
|
||||||
parser.set_defaults(**to_dot_notation(yaml_config))
|
parser.set_defaults(**to_dot_notation(yaml_config))
|
||||||
|
|
||||||
|
@ -206,7 +219,7 @@ class ArchivingOrchestrator:
|
||||||
parser = self.parser
|
parser = self.parser
|
||||||
|
|
||||||
# Module loading from the command line
|
# Module loading from the command line
|
||||||
for module_type in BaseModule.MODULE_TYPES:
|
for module_type in MODULE_TYPES:
|
||||||
parser.add_argument(f'--{module_type}s', dest=f'{module_type}s', nargs='+', help=f'the {module_type}s to use', default=[], action=UniqueAppendAction)
|
parser.add_argument(f'--{module_type}s', dest=f'{module_type}s', nargs='+', help=f'the {module_type}s to use', default=[], action=UniqueAppendAction)
|
||||||
|
|
||||||
def add_additional_args(self, parser: argparse.ArgumentParser = None):
|
def add_additional_args(self, parser: argparse.ArgumentParser = None):
|
||||||
|
@ -232,7 +245,7 @@ class ArchivingOrchestrator:
|
||||||
def add_individual_module_args(self, modules: list[LazyBaseModule] = None, parser: argparse.ArgumentParser = None) -> None:
|
def add_individual_module_args(self, modules: list[LazyBaseModule] = None, parser: argparse.ArgumentParser = None) -> None:
|
||||||
|
|
||||||
if not modules:
|
if not modules:
|
||||||
modules = available_modules(with_manifest=True)
|
modules = self.module_factory.available_modules()
|
||||||
|
|
||||||
for module in modules:
|
for module in modules:
|
||||||
|
|
||||||
|
@ -274,11 +287,18 @@ class ArchivingOrchestrator:
|
||||||
|
|
||||||
def setup_logging(self, config):
|
def setup_logging(self, config):
|
||||||
# setup loguru logging
|
# setup loguru logging
|
||||||
logger.remove(0) # remove the default logger
|
try:
|
||||||
|
logger.remove(0) # remove the default logger
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
logging_config = config['logging']
|
logging_config = config['logging']
|
||||||
logger.add(sys.stderr, level=logging_config['level'])
|
|
||||||
if log_file := logging_config['file']:
|
# add other logging info
|
||||||
logger.add(log_file) if not logging_config['rotation'] else logger.add(log_file, rotation=logging_config['rotation'])
|
if self.logger_id is None: # note - need direct comparison to None since need to consider falsy value 0
|
||||||
|
self.logger_id = logger.add(sys.stderr, level=logging_config['level'])
|
||||||
|
if log_file := logging_config['file']:
|
||||||
|
logger.add(log_file) if not logging_config['rotation'] else logger.add(log_file, rotation=logging_config['rotation'])
|
||||||
|
|
||||||
def install_modules(self, modules_by_type):
|
def install_modules(self, modules_by_type):
|
||||||
"""
|
"""
|
||||||
|
@ -288,7 +308,7 @@ class ArchivingOrchestrator:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
invalid_modules = []
|
invalid_modules = []
|
||||||
for module_type in BaseModule.MODULE_TYPES:
|
for module_type in MODULE_TYPES:
|
||||||
|
|
||||||
step_items = []
|
step_items = []
|
||||||
modules_to_load = modules_by_type[f"{module_type}s"]
|
modules_to_load = modules_by_type[f"{module_type}s"]
|
||||||
|
@ -333,7 +353,7 @@ class ArchivingOrchestrator:
|
||||||
if module in invalid_modules:
|
if module in invalid_modules:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
loaded_module: BaseModule = get_module(module, self.config)
|
loaded_module: BaseModule = self.module_factory.get_module(module, self.config)
|
||||||
except (KeyboardInterrupt, Exception) as e:
|
except (KeyboardInterrupt, Exception) as e:
|
||||||
logger.error(f"Error during setup of modules: {e}\n{traceback.format_exc()}")
|
logger.error(f"Error during setup of modules: {e}\n{traceback.format_exc()}")
|
||||||
if module_type == 'extractor' and loaded_module.name == module:
|
if module_type == 'extractor' and loaded_module.name == module:
|
||||||
|
@ -359,14 +379,17 @@ class ArchivingOrchestrator:
|
||||||
def setup_config(self, args: list) -> dict:
|
def setup_config(self, args: list) -> dict:
|
||||||
"""
|
"""
|
||||||
Sets up the configuration file, merging the default config with the user's config
|
Sets up the configuration file, merging the default config with the user's config
|
||||||
|
|
||||||
|
This function should only ever be run once.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.setup_basic_parser()
|
self.setup_basic_parser()
|
||||||
|
|
||||||
# parse the known arguments for now (basically, we want the config file)
|
# parse the known arguments for now (basically, we want the config file)
|
||||||
basic_config, unused_args = self.basic_parser.parse_known_args(args)
|
basic_config, unused_args = self.basic_parser.parse_known_args(args)
|
||||||
|
|
||||||
# setup any custom module paths, so they'll show in the help and for arg parsing
|
# setup any custom module paths, so they'll show in the help and for arg parsing
|
||||||
setup_paths(basic_config.module_paths)
|
self.module_factory.setup_paths(basic_config.module_paths)
|
||||||
|
|
||||||
# if help flag was called, then show the help
|
# if help flag was called, then show the help
|
||||||
if basic_config.help:
|
if basic_config.help:
|
||||||
|
@ -378,16 +401,29 @@ class ArchivingOrchestrator:
|
||||||
|
|
||||||
def setup(self, args: list):
|
def setup(self, args: list):
|
||||||
"""
|
"""
|
||||||
Main entry point for the orchestrator, sets up the basic parser, loads the config file, and sets up the complete parser
|
Function to configure all setup of the orchestrator: setup configs and load modules.
|
||||||
|
|
||||||
|
This method should only ever be called once
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if self.setup_finished:
|
||||||
|
logger.warning("The `setup_config()` function should only ever be run once. \
|
||||||
|
If you need to re-run the setup, please re-instantiate a new instance of the orchestrator. \
|
||||||
|
For code implementatations, you should call .setup_config() once then you may call .feed() \
|
||||||
|
multiple times to archive multiple URLs.")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.setup_basic_parser()
|
||||||
self.config = self.setup_config(args)
|
self.config = self.setup_config(args)
|
||||||
|
|
||||||
logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
|
logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
|
||||||
self.install_modules(self.config['steps'])
|
self.install_modules(self.config['steps'])
|
||||||
|
|
||||||
# log out the modules that were loaded
|
# log out the modules that were loaded
|
||||||
for module_type in BaseModule.MODULE_TYPES:
|
for module_type in MODULE_TYPES:
|
||||||
logger.info(f"{module_type.upper()}S: " + ", ".join(m.display_name for m in getattr(self, f"{module_type}s")))
|
logger.info(f"{module_type.upper()}S: " + ", ".join(m.display_name for m in getattr(self, f"{module_type}s")))
|
||||||
|
|
||||||
|
self.setup_finished = True
|
||||||
|
|
||||||
def _command_line_run(self, args: list) -> Generator[Metadata]:
|
def _command_line_run(self, args: list) -> Generator[Metadata]:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -14,7 +14,7 @@ from auto_archiver.utils.misc import random_str
|
||||||
|
|
||||||
from auto_archiver.core import Media, BaseModule, Metadata
|
from auto_archiver.core import Media, BaseModule, Metadata
|
||||||
from auto_archiver.modules.hash_enricher.hash_enricher import HashEnricher
|
from auto_archiver.modules.hash_enricher.hash_enricher import HashEnricher
|
||||||
from auto_archiver.core.module import get_module
|
|
||||||
class Storage(BaseModule):
|
class Storage(BaseModule):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -74,7 +74,7 @@ class Storage(BaseModule):
|
||||||
filename = random_str(24)
|
filename = random_str(24)
|
||||||
elif filename_generator == "static":
|
elif filename_generator == "static":
|
||||||
# load the hash_enricher module
|
# load the hash_enricher module
|
||||||
he = get_module(HashEnricher, self.config)
|
he = self.module_factory.get_module(HashEnricher, self.config)
|
||||||
hd = he.calculate_hash(media.filename)
|
hd = he.calculate_hash(media.filename)
|
||||||
filename = hd[:24]
|
filename = hd[:24]
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -10,7 +10,6 @@ from auto_archiver.version import __version__
|
||||||
from auto_archiver.core import Metadata, Media
|
from auto_archiver.core import Metadata, Media
|
||||||
from auto_archiver.core import Formatter
|
from auto_archiver.core import Formatter
|
||||||
from auto_archiver.utils.misc import random_str
|
from auto_archiver.utils.misc import random_str
|
||||||
from auto_archiver.core.module import get_module
|
|
||||||
|
|
||||||
class HtmlFormatter(Formatter):
|
class HtmlFormatter(Formatter):
|
||||||
environment: Environment = None
|
environment: Environment = None
|
||||||
|
@ -50,7 +49,7 @@ class HtmlFormatter(Formatter):
|
||||||
final_media = Media(filename=html_path, _mimetype="text/html")
|
final_media = Media(filename=html_path, _mimetype="text/html")
|
||||||
|
|
||||||
# get the already instantiated hash_enricher module
|
# get the already instantiated hash_enricher module
|
||||||
he = get_module('hash_enricher', self.config)
|
he = self.module_factory.get_module('hash_enricher', self.config)
|
||||||
if len(hd := he.calculate_hash(final_media.filename)):
|
if len(hd := he.calculate_hash(final_media.filename)):
|
||||||
final_media.set("hash", f"{he.algorithm}:{hd}")
|
final_media.set("hash", f"{he.algorithm}:{hd}")
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,6 @@ from loguru import logger
|
||||||
|
|
||||||
from auto_archiver.core import Enricher
|
from auto_archiver.core import Enricher
|
||||||
from auto_archiver.core import Metadata, Media
|
from auto_archiver.core import Metadata, Media
|
||||||
from auto_archiver.core.module import get_module
|
|
||||||
|
|
||||||
class WhisperEnricher(Enricher):
|
class WhisperEnricher(Enricher):
|
||||||
"""
|
"""
|
||||||
|
@ -15,7 +14,7 @@ class WhisperEnricher(Enricher):
|
||||||
|
|
||||||
def setup(self) -> None:
|
def setup(self) -> None:
|
||||||
self.stores = self.config['steps']['storages']
|
self.stores = self.config['steps']['storages']
|
||||||
self.s3 = get_module("s3_storage", self.config)
|
self.s3 = self.module_factory.get_module("s3_storage", self.config)
|
||||||
if not "s3_storage" in self.stores:
|
if not "s3_storage" in self.stores:
|
||||||
logger.error("WhisperEnricher: To use the WhisperEnricher you need to use S3Storage so files are accessible publicly to the whisper service being called.")
|
logger.error("WhisperEnricher: To use the WhisperEnricher you need to use S3Storage so files are accessible publicly to the whisper service being called.")
|
||||||
return
|
return
|
||||||
|
|
|
@ -10,7 +10,7 @@ import hashlib
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from auto_archiver.core.metadata import Metadata
|
from auto_archiver.core.metadata import Metadata
|
||||||
from auto_archiver.core.module import get_module, _LAZY_LOADED_MODULES
|
from auto_archiver.core.module import ModuleFactory
|
||||||
|
|
||||||
# Test names inserted into this list will be run last. This is useful for expensive/costly tests
|
# Test names inserted into this list will be run last. This is useful for expensive/costly tests
|
||||||
# that you only want to run if everything else succeeds (e.g. API calls). The order here is important
|
# that you only want to run if everything else succeeds (e.g. API calls). The order here is important
|
||||||
|
@ -22,19 +22,19 @@ TESTS_TO_RUN_LAST = ['test_twitter_api_archiver']
|
||||||
def setup_module(request):
|
def setup_module(request):
|
||||||
def _setup_module(module_name, config={}):
|
def _setup_module(module_name, config={}):
|
||||||
|
|
||||||
|
module_factory = ModuleFactory()
|
||||||
|
|
||||||
if isinstance(module_name, type):
|
if isinstance(module_name, type):
|
||||||
# get the module name:
|
# get the module name:
|
||||||
# if the class does not have a .name, use the name of the parent folder
|
# if the class does not have a .name, use the name of the parent folder
|
||||||
module_name = module_name.__module__.rsplit(".",2)[-2]
|
module_name = module_name.__module__.rsplit(".",2)[-2]
|
||||||
|
|
||||||
m = get_module(module_name, {module_name: config})
|
m = module_factory.get_module(module_name, {module_name: config})
|
||||||
|
|
||||||
# add the tmp_dir to the module
|
# add the tmp_dir to the module
|
||||||
tmp_dir = TemporaryDirectory()
|
tmp_dir = TemporaryDirectory()
|
||||||
m.tmp_dir = tmp_dir.name
|
m.tmp_dir = tmp_dir.name
|
||||||
|
|
||||||
def cleanup():
|
def cleanup():
|
||||||
_LAZY_LOADED_MODULES.pop(module_name)
|
|
||||||
tmp_dir.cleanup()
|
tmp_dir.cleanup()
|
||||||
request.addfinalizer(cleanup)
|
request.addfinalizer(cleanup)
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ import pytest
|
||||||
|
|
||||||
from auto_archiver.modules.hash_enricher import HashEnricher
|
from auto_archiver.modules.hash_enricher import HashEnricher
|
||||||
from auto_archiver.core import Metadata, Media
|
from auto_archiver.core import Metadata, Media
|
||||||
from auto_archiver.core.module import get_module_lazy
|
from auto_archiver.core.module import ModuleFactory
|
||||||
|
|
||||||
@pytest.mark.parametrize("algorithm, filename, expected_hash", [
|
@pytest.mark.parametrize("algorithm, filename, expected_hash", [
|
||||||
("SHA-256", "tests/data/testfile_1.txt", "1b4f0e9851971998e732078544c96b36c3d01cedf7caa332359d6f1d83567014"),
|
("SHA-256", "tests/data/testfile_1.txt", "1b4f0e9851971998e732078544c96b36c3d01cedf7caa332359d6f1d83567014"),
|
||||||
|
@ -22,7 +22,7 @@ def test_default_config_values(setup_module):
|
||||||
|
|
||||||
def test_config():
|
def test_config():
|
||||||
# test default config
|
# test default config
|
||||||
c = get_module_lazy('hash_enricher').configs
|
c = ModuleFactory().get_module_lazy('hash_enricher').configs
|
||||||
assert c["algorithm"]["default"] == "SHA-256"
|
assert c["algorithm"]["default"] == "SHA-256"
|
||||||
assert c["chunksize"]["default"] == 16000000
|
assert c["chunksize"]["default"] == 16000000
|
||||||
assert c["algorithm"]["choices"] == ["SHA-256", "SHA3-512"]
|
assert c["algorithm"]["choices"] == ["SHA-256", "SHA3-512"]
|
||||||
|
|
|
@ -1,24 +1,18 @@
|
||||||
import sys
|
import sys
|
||||||
import pytest
|
import pytest
|
||||||
from auto_archiver.core.module import get_module_lazy, BaseModule, LazyBaseModule, _LAZY_LOADED_MODULES
|
from auto_archiver.core.module import ModuleFactory, LazyBaseModule
|
||||||
|
from auto_archiver.core.base_module import BaseModule
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def example_module():
|
def example_module():
|
||||||
import auto_archiver
|
import auto_archiver
|
||||||
|
|
||||||
|
module_factory = ModuleFactory()
|
||||||
|
|
||||||
previous_path = auto_archiver.modules.__path__
|
previous_path = auto_archiver.modules.__path__
|
||||||
auto_archiver.modules.__path__.append("tests/data/test_modules/")
|
auto_archiver.modules.__path__.append("tests/data/test_modules/")
|
||||||
|
|
||||||
module = get_module_lazy("example_module")
|
return module_factory.get_module_lazy("example_module")
|
||||||
yield module
|
|
||||||
# cleanup
|
|
||||||
try:
|
|
||||||
del module._manifest
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
del _LAZY_LOADED_MODULES["example_module"]
|
|
||||||
sys.modules.pop("auto_archiver.modules.example_module.example_module", None)
|
|
||||||
auto_archiver.modules.__path__ = previous_path
|
|
||||||
|
|
||||||
def test_get_module_lazy(example_module):
|
def test_get_module_lazy(example_module):
|
||||||
assert example_module.name == "example_module"
|
assert example_module.name == "example_module"
|
||||||
|
@ -46,12 +40,14 @@ def test_module_dependency_check_loads_module(example_module):
|
||||||
# monkey patch the manifest to include a nonexistnet dependency
|
# monkey patch the manifest to include a nonexistnet dependency
|
||||||
example_module.manifest["dependencies"]["python"] = ["hash_enricher"]
|
example_module.manifest["dependencies"]["python"] = ["hash_enricher"]
|
||||||
|
|
||||||
|
module_factory = example_module.module_factory
|
||||||
|
|
||||||
loaded_module = example_module.load({})
|
loaded_module = example_module.load({})
|
||||||
assert loaded_module is not None
|
assert loaded_module is not None
|
||||||
|
|
||||||
# check the dependency is loaded
|
# check the dependency is loaded
|
||||||
assert _LAZY_LOADED_MODULES["hash_enricher"] is not None
|
assert module_factory._lazy_modules["hash_enricher"] is not None
|
||||||
assert _LAZY_LOADED_MODULES["hash_enricher"]._instance is not None
|
assert module_factory._lazy_modules["hash_enricher"]._instance is not None
|
||||||
|
|
||||||
def test_load_module(example_module):
|
def test_load_module(example_module):
|
||||||
|
|
||||||
|
@ -69,7 +65,7 @@ def test_load_module(example_module):
|
||||||
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
|
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
|
||||||
def test_load_modules(module_name):
|
def test_load_modules(module_name):
|
||||||
# test that specific modules can be loaded
|
# test that specific modules can be loaded
|
||||||
module = get_module_lazy(module_name)
|
module = ModuleFactory().get_module_lazy(module_name)
|
||||||
assert module is not None
|
assert module is not None
|
||||||
assert isinstance(module, LazyBaseModule)
|
assert isinstance(module, LazyBaseModule)
|
||||||
assert module.name == module_name
|
assert module.name == module_name
|
||||||
|
@ -86,7 +82,7 @@ def test_load_modules(module_name):
|
||||||
|
|
||||||
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
|
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
|
||||||
def test_lazy_base_module(module_name):
|
def test_lazy_base_module(module_name):
|
||||||
lazy_module = get_module_lazy(module_name)
|
lazy_module = ModuleFactory().get_module_lazy(module_name)
|
||||||
|
|
||||||
assert lazy_module is not None
|
assert lazy_module is not None
|
||||||
assert isinstance(lazy_module, LazyBaseModule)
|
assert isinstance(lazy_module, LazyBaseModule)
|
||||||
|
|
|
@ -4,7 +4,7 @@ from argparse import ArgumentParser, ArgumentTypeError
|
||||||
from auto_archiver.core.orchestrator import ArchivingOrchestrator
|
from auto_archiver.core.orchestrator import ArchivingOrchestrator
|
||||||
from auto_archiver.version import __version__
|
from auto_archiver.version import __version__
|
||||||
from auto_archiver.core.config import read_yaml, store_yaml
|
from auto_archiver.core.config import read_yaml, store_yaml
|
||||||
from auto_archiver.core.module import _LAZY_LOADED_MODULES
|
|
||||||
|
|
||||||
TEST_ORCHESTRATION = "tests/data/test_orchestration.yaml"
|
TEST_ORCHESTRATION = "tests/data/test_orchestration.yaml"
|
||||||
TEST_MODULES = "tests/data/test_modules/"
|
TEST_MODULES = "tests/data/test_modules/"
|
||||||
|
@ -17,22 +17,7 @@ def test_args():
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def orchestrator():
|
def orchestrator():
|
||||||
yield ArchivingOrchestrator()
|
return ArchivingOrchestrator()
|
||||||
# hack - the loguru logger starts with one logger, but if orchestrator has run before
|
|
||||||
# it'll remove the default logger, add it back in:
|
|
||||||
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
if not logger._core.handlers.get(0):
|
|
||||||
logger._core.handlers_count = 0
|
|
||||||
logger.add(sys.stderr)
|
|
||||||
# and remove the custom logger
|
|
||||||
if logger._core.handlers.get(1):
|
|
||||||
logger.remove(1)
|
|
||||||
|
|
||||||
# delete out any loaded modules
|
|
||||||
_LAZY_LOADED_MODULES.clear()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def basic_parser(orchestrator) -> ArgumentParser:
|
def basic_parser(orchestrator) -> ArgumentParser:
|
||||||
|
|
Ładowanie…
Reference in New Issue