kopia lustrzana https://github.com/bellingcat/auto-archiver
260 wiersze
8.1 KiB
Python
260 wiersze
8.1 KiB
Python
import pytest
|
|
from argparse import ArgumentParser, ArgumentTypeError
|
|
from auto_archiver.core.orchestrator import ArchivingOrchestrator
|
|
from auto_archiver.version import __version__
|
|
from auto_archiver.core.config import read_yaml, store_yaml
|
|
from auto_archiver.core import Metadata
|
|
from auto_archiver.core.consts import SetupError
|
|
|
|
TEST_ORCHESTRATION = "tests/data/test_orchestration.yaml"
|
|
TEST_MODULES = "tests/data/test_modules/"
|
|
|
|
|
|
@pytest.fixture
|
|
def test_args():
|
|
return [
|
|
"--config",
|
|
TEST_ORCHESTRATION,
|
|
"--module_paths",
|
|
TEST_MODULES,
|
|
"--example_module.required_field",
|
|
"some_value",
|
|
] # just set this for normal testing, we will remove it later
|
|
|
|
|
|
@pytest.fixture
|
|
def orchestrator():
|
|
return ArchivingOrchestrator()
|
|
|
|
|
|
@pytest.fixture
|
|
def basic_parser(orchestrator) -> ArgumentParser:
|
|
return orchestrator.setup_basic_parser()
|
|
|
|
|
|
def test_setup_orchestrator(orchestrator):
|
|
assert orchestrator is not None
|
|
|
|
|
|
def test_parse_config():
|
|
pass
|
|
|
|
|
|
def test_parse_basic(basic_parser):
|
|
args = basic_parser.parse_args(["--config", TEST_ORCHESTRATION])
|
|
assert args.config_file == TEST_ORCHESTRATION
|
|
|
|
|
|
@pytest.mark.parametrize("mode", ["simple", "full"])
|
|
def test_mode(basic_parser, mode):
|
|
args = basic_parser.parse_args(["--mode", mode])
|
|
assert args.mode == mode
|
|
|
|
|
|
def test_mode_invalid(basic_parser, capsys):
|
|
with pytest.raises(SystemExit) as exit_error:
|
|
basic_parser.parse_args(["--mode", "invalid"])
|
|
assert exit_error.value.code == 2
|
|
assert "invalid choice" in capsys.readouterr().err
|
|
|
|
|
|
def test_version(basic_parser, capsys):
|
|
with pytest.raises(SystemExit) as exit_error:
|
|
basic_parser.parse_args(["--version"])
|
|
assert exit_error.value.code == 0
|
|
assert capsys.readouterr().out == f"{__version__}\n"
|
|
|
|
|
|
def test_help(orchestrator, basic_parser, capsys):
|
|
args = basic_parser.parse_args(["--help"])
|
|
assert args.help is True
|
|
|
|
# test the show_help() on orchestrator
|
|
with pytest.raises(SystemExit) as exit_error:
|
|
orchestrator.show_help(args)
|
|
|
|
assert exit_error.value.code == 0
|
|
|
|
logs = capsys.readouterr().out
|
|
assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in logs
|
|
|
|
# basic config options
|
|
assert "--version" in logs
|
|
|
|
# setting modules options
|
|
assert "--feeders" in logs
|
|
assert "--extractors" in logs
|
|
|
|
# authentication options
|
|
assert "--authentication" in logs
|
|
|
|
# logging options
|
|
assert "--logging.level" in logs
|
|
|
|
# individual module configs
|
|
assert "--gsheet_feeder_db.sheet_id" in logs
|
|
|
|
|
|
def test_add_custom_modules_path(orchestrator, test_args):
|
|
orchestrator.setup_config(test_args)
|
|
|
|
import auto_archiver
|
|
|
|
assert "tests/data/test_modules/" in auto_archiver.modules.__path__
|
|
|
|
|
|
def test_add_custom_modules_path_invalid(orchestrator, caplog, test_args):
|
|
orchestrator.setup_config(
|
|
test_args # we still need to load the real path to get the example_module
|
|
+ ["--module_paths", "tests/data/invalid_test_modules/"]
|
|
)
|
|
|
|
assert caplog.records[0].message == "Path 'tests/data/invalid_test_modules/' does not exist. Skipping..."
|
|
|
|
|
|
def test_check_required_values(orchestrator, caplog, test_args):
|
|
# drop the example_module.required_field from the test_args
|
|
test_args = test_args[:-2]
|
|
|
|
with pytest.raises(SystemExit):
|
|
orchestrator.setup_config(test_args)
|
|
|
|
assert caplog.records[1].message == "the following arguments are required: --example_module.required_field"
|
|
|
|
|
|
def test_get_required_values_from_config(orchestrator, test_args, tmp_path):
|
|
# load the default example yaml, add a required field, then run the orchestrator
|
|
test_yaml = read_yaml(TEST_ORCHESTRATION)
|
|
test_yaml["example_module"] = {"required_field": "some_value"}
|
|
# write it to a temp file
|
|
tmp_file = (tmp_path / "temp_config.yaml").as_posix()
|
|
store_yaml(test_yaml, tmp_file)
|
|
|
|
# run the orchestrator
|
|
config = orchestrator.setup_config(["--config", tmp_file, "--module_paths", TEST_MODULES])
|
|
assert config is not None
|
|
|
|
|
|
def test_load_authentication_string(orchestrator, test_args):
|
|
config = orchestrator.setup_config(
|
|
test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}']
|
|
)
|
|
assert config["authentication"] == {"facebook.com": {"username": "my_username", "password": "my_password"}}
|
|
|
|
|
|
def test_load_authentication_string_concat_site(orchestrator, test_args):
|
|
config = orchestrator.setup_config(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
|
|
assert config["authentication"] == {"x.com": {"api_key": "my_key"}, "twitter.com": {"api_key": "my_key"}}
|
|
|
|
|
|
def test_load_invalid_authentication_string(orchestrator, test_args):
|
|
with pytest.raises(ArgumentTypeError):
|
|
orchestrator.setup_config(test_args + ["--authentication", "{''invalid_json"])
|
|
|
|
|
|
def test_load_authentication_invalid_dict(orchestrator, test_args):
|
|
with pytest.raises(ArgumentTypeError):
|
|
orchestrator.setup_config(test_args + ["--authentication", "[true, false]"])
|
|
|
|
|
|
def test_load_modules_from_commandline(orchestrator, test_args):
|
|
args = test_args + [
|
|
"--feeders",
|
|
"example_module",
|
|
"--extractors",
|
|
"example_module",
|
|
"--databases",
|
|
"example_module",
|
|
"--enrichers",
|
|
"example_module",
|
|
"--formatters",
|
|
"example_module",
|
|
]
|
|
|
|
orchestrator.setup(args)
|
|
|
|
assert len(orchestrator.feeders) == 1
|
|
assert len(orchestrator.extractors) == 1
|
|
assert len(orchestrator.databases) == 1
|
|
assert len(orchestrator.enrichers) == 1
|
|
assert len(orchestrator.formatters) == 1
|
|
|
|
assert orchestrator.feeders[0].name == "example_module"
|
|
assert orchestrator.extractors[0].name == "example_module"
|
|
assert orchestrator.databases[0].name == "example_module"
|
|
assert orchestrator.enrichers[0].name == "example_module"
|
|
assert orchestrator.formatters[0].name == "example_module"
|
|
|
|
|
|
def test_load_settings_for_module_from_commandline(orchestrator, test_args):
|
|
args = test_args + [
|
|
"--feeders",
|
|
"gsheet_feeder_db",
|
|
"--gsheet_feeder_db.sheet_id",
|
|
"123",
|
|
"--gsheet_feeder_db.service_account",
|
|
"tests/data/test_service_account.json",
|
|
]
|
|
|
|
orchestrator.setup(args)
|
|
|
|
assert len(orchestrator.feeders) == 1
|
|
assert orchestrator.feeders[0].name == "gsheet_feeder_db"
|
|
assert orchestrator.config["gsheet_feeder_db"]["sheet_id"] == "123"
|
|
|
|
|
|
def test_multiple_orchestrator(test_args):
|
|
o1_args = test_args + [
|
|
"--feeders",
|
|
"gsheet_feeder_db",
|
|
"--gsheet_feeder_db.service_account",
|
|
"tests/data/test_service_account.json",
|
|
]
|
|
o1 = ArchivingOrchestrator()
|
|
|
|
with pytest.raises(ValueError):
|
|
# this should fail because the gsheet_feeder_db requires a sheet_id / sheet
|
|
o1.setup(o1_args)
|
|
|
|
o2_args = test_args + ["--feeders", "example_module"]
|
|
o2 = ArchivingOrchestrator()
|
|
o2.setup(o2_args)
|
|
|
|
assert o2.feeders[0].name == "example_module"
|
|
|
|
output: Metadata = list(o2.feed())
|
|
assert len(output) == 1
|
|
assert output[0].get_url() == "https://example.com"
|
|
|
|
|
|
def test_wrong_step_type(test_args, caplog):
|
|
args = test_args + [
|
|
"--feeders",
|
|
"example_extractor", # example_extractor is not a valid feeder!
|
|
]
|
|
|
|
orchestrator = ArchivingOrchestrator()
|
|
with pytest.raises(SetupError) as err:
|
|
orchestrator.setup(args)
|
|
assert "Module 'example_extractor' is not a feeder" in str(err.value)
|
|
|
|
|
|
def test_load_failed_extractor_cleanup(test_args, mocker, caplog):
|
|
orchestrator = ArchivingOrchestrator()
|
|
|
|
# hack to set up the paths so we can patch properly
|
|
orchestrator.module_factory.setup_paths([TEST_MODULES])
|
|
|
|
# patch example_module.setup to throw an exception
|
|
mocker.patch(
|
|
"auto_archiver.modules.example_extractor.example_extractor.ExampleExtractor.setup",
|
|
side_effect=Exception("Test exception"),
|
|
)
|
|
|
|
with pytest.raises(Exception):
|
|
orchestrator.setup(test_args + ["--extractors", "example_extractor"])
|
|
|
|
assert "Error during setup of modules: Test exception" in caplog.text
|
|
# make sure the 'cleanup' is called
|
|
assert "cleanup" in caplog.text
|