kopia lustrzana https://github.com/bellingcat/auto-archiver
217 wiersze
8.3 KiB
Python
217 wiersze
8.3 KiB
Python
import base64
|
|
|
|
import pytest
|
|
from selenium.common.exceptions import TimeoutException
|
|
|
|
from auto_archiver.core import Metadata, Media
|
|
from auto_archiver.modules.screenshot_enricher import ScreenshotEnricher
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_selenium_env(mocker):
|
|
"""Patches Selenium calls and driver checks in one place."""
|
|
|
|
# Patch external dependencies
|
|
mock_which = mocker.patch("shutil.which")
|
|
mock_driver_class = mocker.patch("auto_archiver.utils.webdriver.CookieSettingDriver")
|
|
mock_binary_paths = mocker.patch("selenium.webdriver.common.selenium_manager.SeleniumManager.binary_paths")
|
|
mocker.patch("pathlib.Path.is_file", return_value=True)
|
|
mock_popen = mocker.patch("subprocess.Popen")
|
|
mocker.patch("selenium.webdriver.common.service.Service.is_connectable", return_value=True)
|
|
mock_firefox_options = mocker.patch("selenium.webdriver.FirefoxOptions")
|
|
|
|
# Define side effect for `shutil.which`
|
|
def mock_which_side_effect(dep):
|
|
return "/mock/geckodriver" if dep == "geckodriver" else None
|
|
|
|
mock_which.side_effect = mock_which_side_effect
|
|
|
|
# Mock binary paths
|
|
mock_binary_paths.return_value = {
|
|
"driver_path": "/mock/driver",
|
|
"browser_path": "/mock/browser",
|
|
}
|
|
# Mock `subprocess.Popen`
|
|
mock_proc = mocker.MagicMock()
|
|
mock_proc.poll.return_value = None
|
|
mock_popen.return_value = mock_proc
|
|
# Mock `CookieSettingDriver`
|
|
mock_driver = mocker.MagicMock()
|
|
mock_driver_class.return_value = mock_driver
|
|
# Mock `FirefoxOptions`
|
|
mock_options_instance = mocker.MagicMock()
|
|
mock_firefox_options.return_value = mock_options_instance
|
|
yield mock_driver, mock_driver_class, mock_options_instance
|
|
|
|
|
|
@pytest.fixture
|
|
def common_patches(tmp_path, mocker):
|
|
"""Patches common utilities used across multiple tests."""
|
|
mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=False)
|
|
mocker.patch("os.path.join", return_value=str(tmp_path / "test.png"))
|
|
mocker.patch("time.sleep")
|
|
yield
|
|
|
|
|
|
@pytest.fixture
|
|
def screenshot_enricher(setup_module, mock_binary_dependencies) -> ScreenshotEnricher:
|
|
configs: dict = {
|
|
"width": 1280,
|
|
"height": 720,
|
|
"timeout": 60,
|
|
"sleep_before_screenshot": 4,
|
|
"http_proxy": "",
|
|
"save_to_pdf": "False",
|
|
"print_options": {},
|
|
}
|
|
return setup_module("screenshot_enricher", configs)
|
|
|
|
|
|
@pytest.fixture
|
|
def metadata_with_video():
|
|
m = Metadata()
|
|
m.set_url("https://example.com")
|
|
m.add_media(Media(filename="video.mp4").set("id", "video1"))
|
|
return m
|
|
|
|
|
|
def test_enrich_adds_screenshot(
|
|
screenshot_enricher,
|
|
metadata_with_video,
|
|
mock_selenium_env,
|
|
common_patches,
|
|
tmp_path,
|
|
):
|
|
mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
|
|
screenshot_enricher.enrich(metadata_with_video)
|
|
mock_driver_class.assert_called_once_with(
|
|
cookie=None,
|
|
cookie_jar=None,
|
|
facebook_accept_cookies=False,
|
|
options=mock_options_instance,
|
|
)
|
|
# Verify the actual calls on the returned mock_driver
|
|
mock_driver.get.assert_called_once_with("https://example.com")
|
|
mock_driver.save_screenshot.assert_called_once_with(str(tmp_path / "test.png"))
|
|
# Check that the media was added (2 = original video + screenshot)
|
|
assert len(metadata_with_video.media) == 2
|
|
assert metadata_with_video.media[1].properties.get("id") == "screenshot"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"url,is_auth",
|
|
[
|
|
("https://example.com", False),
|
|
("https://private.com", True),
|
|
],
|
|
)
|
|
def test_enrich_auth_wall(
|
|
screenshot_enricher, metadata_with_video, mock_selenium_env, common_patches, url, is_auth, mocker
|
|
):
|
|
# Testing with and without is_auth_wall
|
|
mock_driver, mock_driver_class, _ = mock_selenium_env
|
|
mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=is_auth)
|
|
metadata_with_video.set_url(url)
|
|
screenshot_enricher.enrich(metadata_with_video)
|
|
|
|
if is_auth:
|
|
mock_driver.get.assert_not_called()
|
|
assert len(metadata_with_video.media) == 1
|
|
assert metadata_with_video.media[0].properties.get("id") == "video1"
|
|
else:
|
|
mock_driver.get.assert_called_once_with(url)
|
|
assert len(metadata_with_video.media) == 2
|
|
assert metadata_with_video.media[1].properties.get("id") == "screenshot"
|
|
|
|
|
|
def test_skip_authwall_no_cookies(screenshot_enricher, caplog):
|
|
with caplog.at_level("WARNING"):
|
|
screenshot_enricher.enrich(Metadata().set_url("https://instagram.com"))
|
|
assert "[SKIP] SCREENSHOT since url" in caplog.text
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"auth",
|
|
[
|
|
{"cookie": "cookie"},
|
|
{"cookies_jar": "cookie"},
|
|
],
|
|
)
|
|
def test_dont_skip_authwall_with_cookies(screenshot_enricher, caplog, mocker, mock_selenium_env, auth):
|
|
mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=True)
|
|
|
|
# patch the authentication dict:
|
|
screenshot_enricher.authentication = {"example.com": auth}
|
|
with caplog.at_level("WARNING"):
|
|
screenshot_enricher.enrich(Metadata().set_url("https://example.com"))
|
|
assert "[SKIP] SCREENSHOT since url" not in caplog.text
|
|
|
|
|
|
def test_show_warning_wrong_auth_type(screenshot_enricher, caplog, mocker, mock_selenium_env):
|
|
mock_driver, mock_driver_class, _ = mock_selenium_env
|
|
mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=True)
|
|
screenshot_enricher.authentication = {"example.com": {"username": "user", "password": "pass"}}
|
|
with caplog.at_level("WARNING"):
|
|
screenshot_enricher.enrich(Metadata().set_url("https://example.com"))
|
|
assert "Screenshot enricher only supports cookie-type authentication" in caplog.text
|
|
|
|
|
|
def test_handle_timeout_exception(screenshot_enricher, metadata_with_video, mock_selenium_env, mocker):
|
|
mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
|
|
|
|
mock_driver.get.side_effect = TimeoutException
|
|
mock_log = mocker.patch("loguru.logger.info")
|
|
screenshot_enricher.enrich(metadata_with_video)
|
|
mock_log.assert_called_once_with("TimeoutException loading page for screenshot")
|
|
assert len(metadata_with_video.media) == 1
|
|
|
|
|
|
def test_handle_general_exception(screenshot_enricher, metadata_with_video, mock_selenium_env, mocker):
|
|
"""Test proper handling of unexpected general exceptions"""
|
|
mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
|
|
# Simulate a generic exception when save_screenshot is called
|
|
mock_driver.get.return_value = None
|
|
mock_driver.save_screenshot.side_effect = Exception("Unexpected Error")
|
|
|
|
mock_log = mocker.patch("loguru.logger.error")
|
|
screenshot_enricher.enrich(metadata_with_video)
|
|
# Verify that the exception was logged with the log
|
|
mock_log.assert_called_once_with("Got error while loading webdriver for screenshot enricher: Unexpected Error")
|
|
# And no new media was added due to the error
|
|
assert len(metadata_with_video.media) == 1
|
|
|
|
|
|
def test_pdf_creation(mocker, screenshot_enricher, metadata_with_video, mock_selenium_env):
|
|
"""Test PDF creation when save_to_pdf is enabled"""
|
|
mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
|
|
# Override the save_to_pdf option
|
|
screenshot_enricher.save_to_pdf = True
|
|
# Mock the print_page method to return base64-encoded content
|
|
mock_driver.print_page.return_value = base64.b64encode(b"fake_pdf_content").decode("utf-8")
|
|
# Patch functions with mocker
|
|
mocker.patch("os.path.join", side_effect=lambda *args: f"{args[-1]}")
|
|
mocker.patch(
|
|
"auto_archiver.modules.screenshot_enricher.screenshot_enricher.random_str",
|
|
return_value="fixed123",
|
|
)
|
|
mock_open = mocker.patch("builtins.open", new_callable=mocker.mock_open)
|
|
|
|
screenshot_enricher.enrich(metadata_with_video)
|
|
# Verify screenshot and PDF creation
|
|
mock_driver.save_screenshot.assert_called_once()
|
|
mock_driver.print_page.assert_called_once_with(mock_driver.print_options)
|
|
# Check that PDF file was opened and written
|
|
mock_open.assert_any_call("pdf_fixed123.pdf", "wb")
|
|
|
|
# Ensure both screenshot and PDF were added as media
|
|
assert len(metadata_with_video.media) == 3
|
|
assert metadata_with_video.media[1].properties.get("id") == "screenshot"
|
|
assert metadata_with_video.media[2].properties.get("id") == "pdf"
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def cleanup_files(tmp_path):
|
|
yield
|
|
for file in tmp_path.iterdir():
|
|
file.unlink()
|