kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix up dependency checking (use 'dependencies' instead of 'external_dependencies' -> simpler/easier to remember
rodzic
3d37c494aa
commit
00a7018f36
|
@ -143,6 +143,7 @@ def available_modules(with_manifest: bool=False, limit_to_modules: List[str]= []
|
|||
if _LAZY_LOADED_MODULES.get(possible_module):
|
||||
continue
|
||||
lazy_module = LazyBaseModule(possible_module, possible_module_path)
|
||||
|
||||
_LAZY_LOADED_MODULES[possible_module] = lazy_module
|
||||
|
||||
all_modules.append(lazy_module)
|
||||
|
@ -229,6 +230,9 @@ class LazyBaseModule:
|
|||
# check external dependencies are installed
|
||||
def check_deps(deps, check):
|
||||
for dep in deps:
|
||||
if not len(dep):
|
||||
# clear out any empty strings that a user may have erroneously added
|
||||
continue
|
||||
if not check(dep):
|
||||
logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available/setup. Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
|
||||
exit(1)
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"type": ["database"],
|
||||
"entry_point": "api_db:AAApiDb",
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["requests",
|
||||
"loguru"],
|
||||
},
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "atlos_storage",
|
||||
"type": ["storage"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {"python": ["loguru", "requests"], "bin": [""]},
|
||||
"dependencies": {"python": ["loguru", "requests"], "bin": [""]},
|
||||
"configs": {
|
||||
"path_generator": {
|
||||
"default": "url",
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"type": ["database"],
|
||||
"entry_point": "atlos_db:AtlosDb",
|
||||
"requires_setup": True,
|
||||
"external_dependencies":
|
||||
"dependencies":
|
||||
{"python": ["loguru",
|
||||
""],
|
||||
"bin": [""]},
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Atlos Feeder",
|
||||
"type": ["feeder"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru", "requests"],
|
||||
},
|
||||
"configs": {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "CLI Feeder",
|
||||
"type": ["feeder"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru"],
|
||||
},
|
||||
'entry_point': 'cli_feeder::CLIFeeder',
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Console Database",
|
||||
"type": ["database"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru"],
|
||||
},
|
||||
"description": """
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "CSV Database",
|
||||
"type": ["database"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {"python": ["loguru"]
|
||||
"dependencies": {"python": ["loguru"]
|
||||
},
|
||||
'entry_point': 'csv_db::CSVDb',
|
||||
"configs": {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "CSV Feeder",
|
||||
"type": ["feeder"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru"],
|
||||
"bin": [""]
|
||||
},
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Google Drive Storage",
|
||||
"type": ["storage"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": [
|
||||
"loguru",
|
||||
"google-api-python-client",
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"type": ["database"],
|
||||
"entry_point": "gsheet_db::GsheetsDb",
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru", "gspread", "python-slugify"],
|
||||
},
|
||||
"configs": {
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"type": ["feeder"],
|
||||
"entry_point": "gsheet_feeder::GsheetsFeeder",
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru", "gspread", "python-slugify"],
|
||||
},
|
||||
"configs": {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Hash Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru"],
|
||||
},
|
||||
"configs": {
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
"name": "HTML Formatter",
|
||||
"type": ["formatter"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"python": ["loguru", "jinja2"],
|
||||
"dependencies": {
|
||||
"python": ["hash_enricher", "loguru", "jinja2"],
|
||||
"bin": [""]
|
||||
},
|
||||
"configs": {
|
||||
|
|
|
@ -53,6 +53,7 @@ class HtmlFormatter(Formatter):
|
|||
outf.write(content)
|
||||
final_media = Media(filename=html_path, _mimetype="text/html")
|
||||
|
||||
# get the already instantiated hash_enricher module
|
||||
he = get_module('hash_enricher', self.config)
|
||||
if len(hd := he.calculate_hash(final_media.filename)):
|
||||
final_media.set("hash", f"{he.algorithm}:{hd}")
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "Instagram API Extractor",
|
||||
"type": ["extractor"],
|
||||
"external_dependencies":
|
||||
"dependencies":
|
||||
{"python": ["requests",
|
||||
"loguru",
|
||||
"retrying",
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "Instagram Extractor",
|
||||
"type": ["extractor"],
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": [
|
||||
"instaloader",
|
||||
"loguru",
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "Instagram Telegram Bot Extractor",
|
||||
"type": ["extractor"],
|
||||
"external_dependencies": {"python": ["loguru",
|
||||
"dependencies": {"python": ["loguru",
|
||||
"telethon",],
|
||||
},
|
||||
"requires_setup": True,
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Local Storage",
|
||||
"type": ["storage"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru"],
|
||||
},
|
||||
"configs": {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Archive Metadata Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru"],
|
||||
},
|
||||
"description": """
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Media Metadata Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru"],
|
||||
"bin": ["exiftool"]
|
||||
},
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Mute Formatter",
|
||||
"type": ["formatter"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
},
|
||||
"description": """ Default formatter.
|
||||
""",
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "PDQ Hash Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru", "pdqhash", "numpy", "Pillow"],
|
||||
},
|
||||
"description": """
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "S3 Storage",
|
||||
"type": ["storage"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["boto3", "loguru"],
|
||||
},
|
||||
"configs": {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Screenshot Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru", "selenium"],
|
||||
"bin": ["chromedriver"]
|
||||
},
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "SSL Certificate Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru", "python-slugify"],
|
||||
},
|
||||
'entry_point': 'ssl_enricher::SSLEnricher',
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Telegram Extractor",
|
||||
"type": ["extractor"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": [
|
||||
"requests",
|
||||
"bs4",
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "telethon_extractor",
|
||||
"type": ["extractor"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["telethon",
|
||||
"loguru",
|
||||
"tqdm",
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Thumbnail Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru", "ffmpeg-python"],
|
||||
"bin": ["ffmpeg"]
|
||||
},
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Timestamping Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": [
|
||||
"loguru",
|
||||
"slugify",
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Twitter API Extractor",
|
||||
"type": ["extractor"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["requests",
|
||||
"loguru",
|
||||
"pytwitter",
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"type": ["extractor"],
|
||||
"requires_setup": True,
|
||||
"depends": ["core", "utils"],
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru",
|
||||
"vk_url_scraper"],
|
||||
},
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "WACZ Enricher",
|
||||
"type": ["enricher", "archiver"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": [
|
||||
"loguru",
|
||||
"jsonlines",
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Wayback Machine Enricher",
|
||||
"type": ["enricher", "archiver"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru", "requests"],
|
||||
},
|
||||
"entry_point": "wayback_enricher::WaybackExtractorEnricher",
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Whisper Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": True,
|
||||
"external_dependencies": {
|
||||
"dependencies": {
|
||||
"python": ["loguru", "requests"],
|
||||
},
|
||||
"configs": {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "Example Module",
|
||||
"type": ["extractor"],
|
||||
"requires_setup": False,
|
||||
"external_dependencies": {"python": ["loguru"]
|
||||
"dependencies": {"python": ["loguru"]
|
||||
},
|
||||
"configs": {
|
||||
"csv_file": {"default": "db.csv", "help": "CSV file name"}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from auto_archiver.core.extractor import Extractor
|
||||
|
||||
class ExampleModule(Extractor):
|
||||
pass
|
||||
def download(self, item):
|
||||
print("do something")
|
|
@ -1,13 +1,24 @@
|
|||
import sys
|
||||
import pytest
|
||||
from auto_archiver.core.module import get_module_lazy, BaseModule, LazyBaseModule, _LAZY_LOADED_MODULES
|
||||
from auto_archiver.core.extractor import Extractor
|
||||
|
||||
@pytest.fixture
|
||||
def example_module():
|
||||
yield get_module_lazy("example_module", ["tests/data/"])
|
||||
import auto_archiver
|
||||
|
||||
previous_path = auto_archiver.modules.__path__
|
||||
auto_archiver.modules.__path__.append("tests/data/")
|
||||
|
||||
module = get_module_lazy("example_module")
|
||||
yield module
|
||||
# cleanup
|
||||
_LAZY_LOADED_MODULES.pop("example_module")
|
||||
try:
|
||||
del module._manifest
|
||||
except AttributeError:
|
||||
pass
|
||||
del _LAZY_LOADED_MODULES["example_module"]
|
||||
sys.modules.pop("auto_archiver.modules.example_module.example_module", None)
|
||||
auto_archiver.modules.__path__ = previous_path
|
||||
|
||||
def test_get_module_lazy(example_module):
|
||||
assert example_module.name == "example_module"
|
||||
|
@ -15,18 +26,34 @@ def test_get_module_lazy(example_module):
|
|||
|
||||
assert example_module.manifest is not None
|
||||
|
||||
def test_python_dependency_check(example_module):
|
||||
# example_module requires loguru, which is not installed
|
||||
# monkey patch the manifest to include a nonexistnet dependency
|
||||
example_module.manifest["dependencies"]["python"] = ["does_not_exist"]
|
||||
|
||||
def test_load_module_abc_check(example_module):
|
||||
|
||||
# example_module is an extractor but doesn't have the 'download' method, should raise an ABC error
|
||||
with pytest.raises(TypeError) as load_error:
|
||||
with pytest.raises(SystemExit) as load_error:
|
||||
example_module.load({})
|
||||
assert "Can't instantiate abstract class ExampleModule with abstract method download" in str(load_error.value)
|
||||
|
||||
|
||||
def test_load_module(example_module, monkeypatch):
|
||||
# hack - remove the 'download' method from the required methods of Extractor
|
||||
monkeypatch.setattr(Extractor, "__abstractmethods__", set())
|
||||
assert load_error.value.code == 1
|
||||
|
||||
def test_binary_dependency_check(example_module):
|
||||
# example_module requires ffmpeg, which is not installed
|
||||
# monkey patch the manifest to include a nonexistnet dependency
|
||||
example_module.manifest["dependencies"]["binary"] = ["does_not_exist"]
|
||||
|
||||
def test_module_dependency_check_loads_module(example_module):
|
||||
# example_module requires cli_feeder, which is not installed
|
||||
# monkey patch the manifest to include a nonexistnet dependency
|
||||
example_module.manifest["dependencies"]["python"] = ["hash_enricher"]
|
||||
|
||||
loaded_module = example_module.load({})
|
||||
assert loaded_module is not None
|
||||
|
||||
# check the dependency is loaded
|
||||
assert _LAZY_LOADED_MODULES["hash_enricher"] is not None
|
||||
assert _LAZY_LOADED_MODULES["hash_enricher"]._instance is not None
|
||||
|
||||
def test_load_module(example_module):
|
||||
|
||||
# setup the module, and check that config is set to the default values
|
||||
loaded_module = example_module.load({})
|
||||
|
|
Ładowanie…
Reference in New Issue