auto-archiver/tests/databases/test_gsheet_db.py

139 wiersze
5.4 KiB
Python
Czysty Zwykły widok Historia

2025-02-05 16:42:58 +00:00
from datetime import datetime, timezone
import pytest
from auto_archiver.core import Metadata, Media
from auto_archiver.modules.gsheet_db import GsheetsDb
from auto_archiver.modules.gsheet_feeder import GWorksheet
@pytest.fixture
2025-02-18 23:32:03 +00:00
def mock_gworksheet(mocker):
mock_gworksheet = mocker.MagicMock(spec=GWorksheet)
2025-02-05 16:42:58 +00:00
mock_gworksheet.col_exists.return_value = True
mock_gworksheet.get_cell.return_value = ""
mock_gworksheet.get_row.return_value = {}
return mock_gworksheet
@pytest.fixture
2025-02-18 23:32:03 +00:00
def mock_metadata(mocker):
metadata: Metadata = mocker.MagicMock(spec=Metadata)
2025-02-05 16:42:58 +00:00
metadata.get_url.return_value = "http://example.com"
metadata.status = "done"
metadata.get_title.return_value = "Example Title"
metadata.get.return_value = "Example Content"
2025-02-11 12:17:42 +00:00
metadata.get_timestamp.return_value = "2025-01-01T00:00:00"
2025-02-18 23:32:03 +00:00
metadata.get_final_media.return_value = mocker.MagicMock(spec=Media)
2025-02-05 16:42:58 +00:00
metadata.get_all_media.return_value = []
metadata.get_media_by_id.return_value = None
metadata.get_first_image.return_value = None
return metadata
@pytest.fixture
def metadata():
metadata = Metadata()
2025-02-25 20:06:44 +00:00
metadata.add_media(Media(filename="screenshot.png", urls=["http://example.com/screenshot.png"]).set("id", "screenshot"))
metadata.add_media(Media(filename="browsertrix", urls=["http://example.com/browsertrix.wacz"]).set("id", "browsertrix"))
2025-02-05 16:42:58 +00:00
metadata.set_url("http://example.com")
metadata.set_title("Example Title")
metadata.set_content("Example Content")
metadata.success("my-archiver")
2025-02-11 12:27:48 +00:00
metadata.set("timestamp", "2025-01-01T00:00:00")
2025-02-05 16:42:58 +00:00
metadata.set("date", "2025-02-04T18:22:24.909112+00:00")
return metadata
@pytest.fixture
2025-02-18 23:32:03 +00:00
def mock_media(mocker):
2025-02-05 16:42:58 +00:00
"""Fixture for a mock Media object."""
2025-02-18 23:32:03 +00:00
mock_media = mocker.MagicMock(spec=Media)
2025-02-05 16:42:58 +00:00
mock_media.urls = ["http://example.com/media"]
mock_media.get.return_value = "not-calculated"
return mock_media
@pytest.fixture
2025-02-25 20:06:44 +00:00
def gsheets_db(mock_gworksheet, setup_module, mocker) -> GsheetsDb:
2025-02-05 16:42:58 +00:00
db = setup_module("gsheet_db", {
"allow_worksheets": "set()",
"block_worksheets": "set()",
"use_sheet_names_in_stored_paths": "True",
})
2025-02-18 23:32:03 +00:00
db._retrieve_gsheet = mocker.MagicMock(return_value=(mock_gworksheet, 1))
2025-02-05 16:42:58 +00:00
return db
@pytest.fixture
def fixed_timestamp():
"""Fixture for a fixed timestamp."""
return datetime(2025, 1, 1, tzinfo=timezone.utc)
@pytest.fixture
def expected_calls(mock_media, fixed_timestamp):
"""Fixture for the expected cell updates."""
return [
(1, 'status', 'my-archiver: success'),
(1, 'archive', 'http://example.com/screenshot.png'),
(1, 'date', '2025-02-01T00:00:00+00:00'),
(1, 'title', 'Example Title'),
(1, 'text', 'Example Content'),
(1, 'timestamp', '2025-01-01T00:00:00+00:00'),
(1, 'hash', 'not-calculated'),
2025-02-25 20:06:44 +00:00
(1, 'screenshot', 'http://example.com/screenshot.png'),
(1, 'thumbnail', '=IMAGE("http://example.com/screenshot.png")'),
(1, 'wacz', 'http://example.com/browsertrix.wacz'),
(1, 'replaywebpage', 'https://replayweb.page/?source=http%3A//example.com/browsertrix.wacz#view=pages&url=http%3A//example.com')
2025-02-05 16:42:58 +00:00
]
def test_retrieve_gsheet(gsheets_db, metadata, mock_gworksheet):
gw, row = gsheets_db._retrieve_gsheet(metadata)
assert gw == mock_gworksheet
assert row == 1
def test_started(gsheets_db, mock_metadata, mock_gworksheet):
gsheets_db.started(mock_metadata)
mock_gworksheet.set_cell.assert_called_once_with(1, 'status', 'Archive in progress')
def test_failed(gsheets_db, mock_metadata, mock_gworksheet):
reason = "Test failure"
gsheets_db.failed(mock_metadata, reason)
mock_gworksheet.set_cell.assert_called_once_with(1, 'status', f'Archive failed {reason}')
2025-02-05 16:42:58 +00:00
def test_aborted(gsheets_db, mock_metadata, mock_gworksheet):
gsheets_db.aborted(mock_metadata)
mock_gworksheet.set_cell.assert_called_once_with(1, 'status', '')
2025-02-18 23:32:03 +00:00
def test_done(gsheets_db, metadata, mock_gworksheet, expected_calls, mocker):
mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
gsheets_db.done(metadata)
2025-02-05 16:42:58 +00:00
mock_gworksheet.batch_set_cell.assert_called_once_with(expected_calls)
2025-02-18 23:32:03 +00:00
def test_done_cached(gsheets_db, metadata, mock_gworksheet, mocker):
mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
gsheets_db.done(metadata, cached=True)
2025-02-05 16:42:58 +00:00
# Verify the status message includes "[cached]"
call_args = mock_gworksheet.batch_set_cell.call_args[0][0]
assert any(call[2].startswith("[cached]") for call in call_args)
2025-02-18 23:32:03 +00:00
def test_done_missing_media(gsheets_db, metadata, mock_gworksheet, mocker):
2025-02-05 16:42:58 +00:00
# clear media from metadata
metadata.media = []
2025-02-18 23:32:03 +00:00
mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
gsheets_db.done(metadata)
2025-02-05 16:42:58 +00:00
# Verify nothing media-related gets updated
call_args = mock_gworksheet.batch_set_cell.call_args[0][0]
media_fields = {'archive', 'screenshot', 'thumbnail', 'wacz', 'replaywebpage'}
assert all(call[1] not in media_fields for call in call_args)
def test_safe_status_update(gsheets_db, metadata, mock_gworksheet):
gsheets_db._safe_status_update(metadata, "Test status")
mock_gworksheet.set_cell.assert_called_once_with(1, 'status', 'Test status')