auto-archiver/tests/test_orchestrator.py

import pytest
from argparse import ArgumentParser, ArgumentTypeError
from auto_archiver.core.orchestrator import ArchivingOrchestrator
from auto_archiver.version import __version__
from auto_archiver.core.config import read_yaml, store_yaml
from auto_archiver.core import Metadata
from auto_archiver.core.consts import SetupError

TEST_ORCHESTRATION = "tests/data/test_orchestration.yaml"
TEST_MODULES = "tests/data/test_modules/"


@pytest.fixture
def test_args():
    return [
        "--config",
        TEST_ORCHESTRATION,
        "--module_paths",
        TEST_MODULES,
        "--example_module.required_field",
        "some_value",
    ]  # just set this for normal testing, we will remove it later


@pytest.fixture
def orchestrator():
    return ArchivingOrchestrator()


@pytest.fixture
def basic_parser(orchestrator) -> ArgumentParser:
    return orchestrator.setup_basic_parser()


def test_setup_orchestrator(orchestrator):
    assert orchestrator is not None


def test_parse_config():
    pass


def test_parse_basic(basic_parser):
    args = basic_parser.parse_args(["--config", TEST_ORCHESTRATION])
    assert args.config_file == TEST_ORCHESTRATION


@pytest.mark.parametrize("mode", ["simple", "full"])
def test_mode(basic_parser, mode):
    args = basic_parser.parse_args(["--mode", mode])
    assert args.mode == mode


def test_mode_invalid(basic_parser, capsys):
    with pytest.raises(SystemExit) as exit_error:
        basic_parser.parse_args(["--mode", "invalid"])
    assert exit_error.value.code == 2
    assert "invalid choice" in capsys.readouterr().err


def test_version(basic_parser, capsys):
    with pytest.raises(SystemExit) as exit_error:
        basic_parser.parse_args(["--version"])
    assert exit_error.value.code == 0
    assert capsys.readouterr().out == f"{__version__}\n"


def test_help(orchestrator, basic_parser, capsys):
    args = basic_parser.parse_args(["--help"])
    assert args.help is True

    # test the show_help() on orchestrator
    with pytest.raises(SystemExit) as exit_error:
        orchestrator.show_help(args)

    assert exit_error.value.code == 0

    logs = capsys.readouterr().out
    assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in logs

    # basic config options
    assert "--version" in logs

    # setting modules options
    assert "--feeders" in logs
    assert "--extractors" in logs

    # authentication options
    assert "--authentication" in logs

    # logging options
    assert "--logging.level" in logs

    # individual module configs
    assert "--gsheet_feeder_db.sheet_id" in logs


def test_add_custom_modules_path(orchestrator, test_args):
    orchestrator.setup_config(test_args)

    import auto_archiver

    assert "tests/data/test_modules/" in auto_archiver.modules.__path__


def test_add_custom_modules_path_invalid(orchestrator, caplog, test_args):
    orchestrator.setup_config(
        test_args  # we still need to load the real path to get the example_module
        + ["--module_paths", "tests/data/invalid_test_modules/"]
    )

    assert caplog.records[0].message == "Path 'tests/data/invalid_test_modules/' does not exist. Skipping..."


def test_check_required_values(orchestrator, caplog, test_args):
    # drop the example_module.required_field from the test_args
    test_args = test_args[:-2]

    with pytest.raises(SystemExit):
        orchestrator.setup_config(test_args)

    assert caplog.records[1].message == "the following arguments are required: --example_module.required_field"


def test_get_required_values_from_config(orchestrator, test_args, tmp_path):
    # load the default example yaml, add a required field, then run the orchestrator
    test_yaml = read_yaml(TEST_ORCHESTRATION)
    test_yaml["example_module"] = {"required_field": "some_value"}
    # write it to a temp file
    tmp_file = (tmp_path / "temp_config.yaml").as_posix()
    store_yaml(test_yaml, tmp_file)

    # run the orchestrator
    config = orchestrator.setup_config(["--config", tmp_file, "--module_paths", TEST_MODULES])
    assert config is not None


def test_load_authentication_string(orchestrator, test_args):
    config = orchestrator.setup_config(
        test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}']
    )
    assert config["authentication"] == {"facebook.com": {"username": "my_username", "password": "my_password"}}


def test_load_authentication_string_concat_site(orchestrator, test_args):
    config = orchestrator.setup_config(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
    assert config["authentication"] == {"x.com": {"api_key": "my_key"}, "twitter.com": {"api_key": "my_key"}}


def test_load_invalid_authentication_string(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
        orchestrator.setup_config(test_args + ["--authentication", "{''invalid_json"])


def test_load_authentication_invalid_dict(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
        orchestrator.setup_config(test_args + ["--authentication", "[true, false]"])


def test_load_modules_from_commandline(orchestrator, test_args):
    args = test_args + [
        "--feeders",
        "example_module",
        "--extractors",
        "example_module",
        "--databases",
        "example_module",
        "--enrichers",
        "example_module",
        "--formatters",
        "example_module",
    ]

    orchestrator.setup(args)

    assert len(orchestrator.feeders) == 1
    assert len(orchestrator.extractors) == 1
    assert len(orchestrator.databases) == 1
    assert len(orchestrator.enrichers) == 1
    assert len(orchestrator.formatters) == 1

    assert orchestrator.feeders[0].name == "example_module"
    assert orchestrator.extractors[0].name == "example_module"
    assert orchestrator.databases[0].name == "example_module"
    assert orchestrator.enrichers[0].name == "example_module"
    assert orchestrator.formatters[0].name == "example_module"


def test_load_settings_for_module_from_commandline(orchestrator, test_args):
    args = test_args + [
        "--feeders",
        "gsheet_feeder_db",
        "--gsheet_feeder_db.sheet_id",
        "123",
        "--gsheet_feeder_db.service_account",
        "tests/data/test_service_account.json",
    ]

    orchestrator.setup(args)

    assert len(orchestrator.feeders) == 1
    assert orchestrator.feeders[0].name == "gsheet_feeder_db"
    assert orchestrator.config["gsheet_feeder_db"]["sheet_id"] == "123"


def test_multiple_orchestrator(test_args):
    o1_args = test_args + [
        "--feeders",
        "gsheet_feeder_db",
        "--gsheet_feeder_db.service_account",
        "tests/data/test_service_account.json",
    ]
    o1 = ArchivingOrchestrator()

    with pytest.raises(ValueError):
        # this should fail because the gsheet_feeder_db requires a sheet_id / sheet
        o1.setup(o1_args)

    o2_args = test_args + ["--feeders", "example_module"]
    o2 = ArchivingOrchestrator()
    o2.setup(o2_args)

    assert o2.feeders[0].name == "example_module"

    output: Metadata = list(o2.feed())
    assert len(output) == 1
    assert output[0].get_url() == "https://example.com"


def test_wrong_step_type(test_args, caplog):
    args = test_args + [
        "--feeders",
        "example_extractor",  # example_extractor is not a valid feeder!
    ]

    orchestrator = ArchivingOrchestrator()
    with pytest.raises(SetupError) as err:
        orchestrator.setup(args)
        assert "Module 'example_extractor' is not a feeder" in str(err.value)


def test_load_failed_extractor_cleanup(test_args, mocker, caplog):
    orchestrator = ArchivingOrchestrator()

    # hack to set up the paths so we can patch properly
    orchestrator.module_factory.setup_paths([TEST_MODULES])

    # patch example_module.setup to throw an exception
    mocker.patch(
        "auto_archiver.modules.example_extractor.example_extractor.ExampleExtractor.setup",
        side_effect=Exception("Test exception"),
    )

    with pytest.raises(Exception):
        orchestrator.setup(test_args + ["--extractors", "example_extractor"])

    assert "Error during setup of modules: Test exception" in caplog.text
    # make sure the 'cleanup' is called
    assert "cleanup" in caplog.text