auto-archiver/tests/test_orchestrator.py

import pytest
import sys
from argparse import ArgumentParser, ArgumentTypeError
from auto_archiver.core.orchestrator import ArchivingOrchestrator
from auto_archiver.version import __version__
from auto_archiver.core.config import read_yaml, store_yaml
from auto_archiver.core import Metadata

TEST_ORCHESTRATION = "tests/data/test_orchestration.yaml"
TEST_MODULES = "tests/data/test_modules/"

@pytest.fixture
def test_args():
    return ["--config", TEST_ORCHESTRATION,
            "--module_paths", TEST_MODULES,
            "--example_module.required_field", "some_value"] # just set this for normal testing, we will remove it later

@pytest.fixture
def orchestrator():
    return ArchivingOrchestrator()

@pytest.fixture
def basic_parser(orchestrator) -> ArgumentParser:
    return orchestrator.setup_basic_parser()

def test_setup_orchestrator(orchestrator):
    assert orchestrator is not None

def test_parse_config():
    pass

def test_parse_basic(basic_parser):
    args = basic_parser.parse_args(["--config", TEST_ORCHESTRATION])
    assert args.config_file == TEST_ORCHESTRATION

@pytest.mark.parametrize("mode", ["simple", "full"])
def test_mode(basic_parser, mode):
    args = basic_parser.parse_args(["--mode", mode])
    assert args.mode == mode

def test_mode_invalid(basic_parser, capsys):
    with pytest.raises(SystemExit) as exit_error:
        basic_parser.parse_args(["--mode", "invalid"])
    assert exit_error.value.code == 2
    assert "invalid choice" in capsys.readouterr().err

def test_version(basic_parser, capsys):
    with pytest.raises(SystemExit) as exit_error:
        basic_parser.parse_args(["--version"])
    assert exit_error.value.code == 0
    assert capsys.readouterr().out == f"{__version__}\n"

def test_help(orchestrator, basic_parser, capsys):

    args = basic_parser.parse_args(["--help"])
    assert args.help == True

    # test the show_help() on orchestrator
    with pytest.raises(SystemExit) as exit_error:
        orchestrator.show_help(args)

    assert exit_error.value.code == 0

    logs = capsys.readouterr().out
    assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in logs

    # basic config options
    assert "--version" in logs

    # setting modules options
    assert "--feeders" in logs
    assert "--extractors" in logs

    # authentication options
    assert "--authentication" in logs

    # logging options
    assert "--logging.level" in logs

    # individual module configs
    assert "--gsheet_feeder.sheet_id" in logs


def test_add_custom_modules_path(orchestrator, test_args):
    orchestrator.setup_config(test_args)

    import auto_archiver
    assert "tests/data/test_modules/" in auto_archiver.modules.__path__

def test_add_custom_modules_path_invalid(orchestrator, caplog, test_args):

    orchestrator.setup_config(test_args +  # we still need to load the real path to get the example_module
                          ["--module_paths", "tests/data/invalid_test_modules/"])

    assert caplog.records[0].message == "Path 'tests/data/invalid_test_modules/' does not exist. Skipping..."


def test_check_required_values(orchestrator, caplog, test_args):
    # drop the example_module.required_field from the test_args
    test_args = test_args[:-2]

    with pytest.raises(SystemExit) as exit_error:
        config = orchestrator.setup_config(test_args)

    assert caplog.records[1].message == "the following arguments are required: --example_module.required_field"

def test_get_required_values_from_config(orchestrator, test_args, tmp_path):

    # load the default example yaml, add a required field, then run the orchestrator
    test_yaml = read_yaml(TEST_ORCHESTRATION)
    test_yaml['example_module'] = {'required_field': 'some_value'}
    # write it to a temp file
    tmp_file = (tmp_path / "temp_config.yaml").as_posix()
    store_yaml(test_yaml, tmp_file)

    # run the orchestrator
    config = orchestrator.setup_config(["--config", tmp_file, "--module_paths", TEST_MODULES])
    assert config is not None

def test_load_authentication_string(orchestrator, test_args):

    config = orchestrator.setup_config(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
    assert config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}

def test_load_authentication_string_concat_site(orchestrator, test_args):

    config = orchestrator.setup_config(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
    assert config['authentication'] == {"x.com": {"api_key": "my_key"},
                                                     "twitter.com": {"api_key": "my_key"}}

def test_load_invalid_authentication_string(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
        orchestrator.setup_config(test_args + ["--authentication", "{\''invalid_json"])

def test_load_authentication_invalid_dict(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
        orchestrator.setup_config(test_args + ["--authentication", "[true, false]"])

def test_load_modules_from_commandline(orchestrator, test_args):
    args = test_args + ["--feeders", "example_module", "--extractors", "example_module", "--databases", "example_module", "--enrichers", "example_module", "--formatters", "example_module"]

    orchestrator.setup(args)

    assert len(orchestrator.feeders) == 1
    assert len(orchestrator.extractors) == 1
    assert len(orchestrator.databases) == 1
    assert len(orchestrator.enrichers) == 1
    assert len(orchestrator.formatters) == 1

    assert orchestrator.feeders[0].name == "example_module"
    assert orchestrator.extractors[0].name == "example_module"
    assert orchestrator.databases[0].name == "example_module"
    assert orchestrator.enrichers[0].name == "example_module"
    assert orchestrator.formatters[0].name == "example_module"

def test_load_settings_for_module_from_commandline(orchestrator, test_args):
    args = test_args + ["--feeders", "gsheet_feeder", "--gsheet_feeder.sheet_id", "123", "--gsheet_feeder.service_account", "tests/data/test_service_account.json"]

    orchestrator.setup(args)

    assert len(orchestrator.feeders) == 1
    assert orchestrator.feeders[0].name == "gsheet_feeder"
    assert orchestrator.config['gsheet_feeder']['sheet_id'] == "123"


def test_multiple_orchestrator(test_args):

    o1_args = test_args + ["--feeders", "gsheet_feeder", "--gsheet_feeder.service_account", "tests/data/test_service_account.json"]
    o1 = ArchivingOrchestrator()

    with pytest.raises(ValueError) as exit_error:
        # this should fail because the gsheet_feeder requires a sheet_id / sheet
        o1.setup(o1_args)


    o2_args = test_args + ["--feeders", "example_module"]
    o2 = ArchivingOrchestrator()
    o2.setup(o2_args)

    assert o2.feeders[0].name == "example_module"

    output: Metadata = list(o2.feed())
    assert len(output) == 1
    assert output[0].get_url() == "https://example.com"