Add more tests.

pull/194/head
erinhmclark 2025-02-14 09:48:37 +00:00
rodzic 3fce593aad
commit 319c1e8f92
10 zmienionych plików z 102 dodań i 33 usunięć

30
poetry.lock wygenerowano
Wyświetl plik

@ -172,18 +172,18 @@ lxml = ["lxml"]
[[package]]
name = "boto3"
version = "1.36.17"
version = "1.36.19"
description = "The AWS SDK for Python"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "boto3-1.36.17-py3-none-any.whl", hash = "sha256:59bcf0c4b04d9cc36f8b418ad17ab3c4a99a21a175d2fad7096aa21cbe84630b"},
{file = "boto3-1.36.17.tar.gz", hash = "sha256:5ecae20e780a3ce9afb3add532b61c466a8cb8960618e4fa565b3883064c1346"},
{file = "boto3-1.36.19-py3-none-any.whl", hash = "sha256:7784590369a9d545bb07b2de56b6ce4d5a5e232883a957f704c3f842caeba155"},
{file = "boto3-1.36.19.tar.gz", hash = "sha256:8c2c2a4ccdfe35dd2611ee1b7473dd2383948415c777e42dc4e7f1ebe371fe8c"},
]
[package.dependencies]
botocore = ">=1.36.17,<1.37.0"
botocore = ">=1.36.19,<1.37.0"
jmespath = ">=0.7.1,<2.0.0"
s3transfer = ">=0.11.0,<0.12.0"
@ -192,14 +192,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
[[package]]
name = "botocore"
version = "1.36.17"
version = "1.36.19"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "botocore-1.36.17-py3-none-any.whl", hash = "sha256:069858b2fd693548035d7fd53a774e37e4260fea64e0ac9b8a3aee904f9321df"},
{file = "botocore-1.36.17.tar.gz", hash = "sha256:cec13e0a7ce78e71aad0b397581b4e81824c7981ef4c261d2e296d200c399b09"},
{file = "botocore-1.36.19-py3-none-any.whl", hash = "sha256:98882c106fec4c08678ea028199f7f5119550fab95d682b30846f7aae04b7bec"},
{file = "botocore-1.36.19.tar.gz", hash = "sha256:cdf6729f601f82b1acdb9004b1f88b57cfb470f576394cdb3bbf5150f7fafb5b"},
]
[package.dependencies]
@ -860,14 +860,14 @@ tool = ["click (>=6.0.0)"]
[[package]]
name = "googleapis-common-protos"
version = "1.66.0"
version = "1.67.0"
description = "Common protobufs used in Google APIs"
optional = false
python-versions = ">=3.7"
groups = ["main"]
files = [
{file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"},
{file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"},
{file = "googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741"},
{file = "googleapis_common_protos-1.67.0.tar.gz", hash = "sha256:21398025365f138be356d5923e9168737d94d46a72aefee4a6110a1f23463c86"},
]
[package.dependencies]
@ -1235,14 +1235,14 @@ files = [
[[package]]
name = "myst-parser"
version = "4.0.0"
version = "4.0.1"
description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser,"
optional = false
python-versions = ">=3.10"
groups = ["docs"]
files = [
{file = "myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d"},
{file = "myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531"},
{file = "myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d"},
{file = "myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4"},
]
[package.dependencies]
@ -1254,10 +1254,10 @@ pyyaml = "*"
sphinx = ">=7,<9"
[package.extras]
code-style = ["pre-commit (>=3.0,<4.0)"]
code-style = ["pre-commit (>=4.0,<5.0)"]
linkify = ["linkify-it-py (>=2.0,<3.0)"]
rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-book-theme (>=1.1,<2.0)", "sphinx-copybutton", "sphinx-design", "sphinx-pyscript", "sphinx-tippy (>=0.4.3)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.9.0,<0.10.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"]
testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"]
testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pygments (<2.19)", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"]
testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"]
[[package]]

Wyświetl plik

@ -29,8 +29,7 @@ class WhisperEnricher(Enricher):
job_results = {}
for i, m in enumerate(to_enrich.media):
if m.is_video() or m.is_audio():
# TODO: this used to pass all storage items to store now
# Now only passing S3, the rest will get added later in the usual order (?)
# Only storing S3, the rest will get added later in the usual order (?)
m.store(url=url, metadata=to_enrich, storages=[self.s3])
try:
job_id = self.submit_job(m)

Wyświetl plik

@ -133,14 +133,6 @@ def unpickle():
return _unpickle
@pytest.fixture
def mock_python_dependencies():
with patch("auto_archiver.core.module") as mock_check_python_dep:
# Mock all Python dependencies as available
mock_check_python_dep.return_value = True
yield mock_check_python_dep
@pytest.fixture
def mock_binary_dependencies():
with patch("shutil.which") as mock_shutil_which:

Plik binarny nie jest wyświetlany.

Plik binarny nie jest wyświetlany.

Wyświetl plik

@ -14,7 +14,7 @@ def mock_media():
@pytest.fixture
def enricher(setup_module):
def enricher(setup_module, mock_binary_dependencies):
return setup_module("metadata_enricher", {})
@ -74,3 +74,16 @@ def test_get_metadata_error_handling(mock_run, mock_logger_error, enricher):
result = enricher.get_metadata("test.jpg")
assert result == {}
mock_logger_error.assert_called_once()
@pytest.mark.skip(reason="Requires ExifTool to be installed. TODO mock")
def test_metadata_pickle(enricher, unpickle):
# Uses a pickle of a YouTube short
metadata = unpickle("tests/data/metadata/metadata_enricher_ytshort_input.pickle")
expected = unpickle("tests/data/metadata/metadata_enricher_ytshort_expected.pickle")
enricher.enrich(metadata)
expected_media = expected.media
actual_media = metadata.media
assert len(expected_media) == len(actual_media)
assert actual_media[0].properties.get("metadata") == expected_media[0].properties.get("metadata")
assert metadata == expected

Wyświetl plik

@ -38,11 +38,13 @@ def test_empty_metadata(metadata, enricher):
def test_ssl_enrich(metadata, enricher):
with patch("ssl.get_server_certificate", return_value="TEST_CERT"), \
patch("builtins.open", mock_open()) as mock_file:
media_len_before = len(metadata.media)
enricher.enrich(metadata)
ssl.get_server_certificate.assert_called_once_with(("example.com", 443))
mock_file.assert_called_once_with(f"{enricher.tmp_dir}/example-com.pem", "w")
mock_file().write.assert_called_once_with("TEST_CERT")
assert len(metadata.media) == media_len_before + 1
# Ensure the certificate is added to metadata
assert any(media.filename.endswith("example-com.pem") for media in metadata.media)

Wyświetl plik

@ -5,12 +5,12 @@ from auto_archiver.modules.thumbnail_enricher import ThumbnailEnricher
@pytest.fixture
def thumbnail_enricher(setup_module) -> ThumbnailEnricher:
configs: dict = {
def thumbnail_enricher(setup_module, mock_binary_dependencies) -> ThumbnailEnricher:
config: dict = {
"thumbnails_per_minute": 60,
"max_thumbnails": 4,
}
return setup_module("thumbnail_enricher", configs)
return setup_module("thumbnail_enricher", config)
@pytest.fixture

Wyświetl plik

@ -8,6 +8,9 @@ from auto_archiver.modules.s3_storage import S3Storage
from auto_archiver.modules.whisper_enricher import WhisperEnricher
TEST_S3_URL = "http://cdn.example.com/test.mp4"
@pytest.fixture
def enricher():
"""Fixture with mocked S3 and API dependencies"""
@ -20,7 +23,7 @@ def enricher():
"steps": {"storages": ["s3_storage"]}
}
mock_s3 = MagicMock(spec=S3Storage)
mock_s3.get_cdn_url.return_value = "http://s3.example.com/media.mp3"
mock_s3.get_cdn_url.return_value = TEST_S3_URL
instance = WhisperEnricher()
instance.name = "whisper_enricher"
instance.display_name = "Whisper Enricher"
@ -53,7 +56,7 @@ def test_successful_job_submission(enricher, metadata, mock_requests):
"""Test successful media processing with S3 configured"""
whisper, mock_s3 = enricher
# Configure mock S3 URL to match test expectation
mock_s3.get_cdn_url.return_value = "http://cdn.example.com/test.mp4"
mock_s3.get_cdn_url.return_value = TEST_S3_URL
# Create test media with matching CDN URL
m = Media("test.mp4")
@ -78,6 +81,7 @@ def test_successful_job_submission(enricher, metadata, mock_requests):
mock_status_response, # First call: status check
mock_artifacts_response # Second call: artifacts check
]
# Run enrichment (without opening file)
whisper.enrich(metadata)
# Check API interactions
@ -89,5 +93,43 @@ def test_successful_job_submission(enricher, metadata, mock_requests):
# Verify job status checks
assert mock_requests.get.call_count == 2
assert "artifact_0_text" in metadata.media[0].get("whisper_model")
assert "test transcript" in metadata.metadata.get("content")
assert metadata.media[0].get("whisper_model") == {'artifact_0_text': 'test transcript', 'job_artifacts_check': 'http://testapi/jobs/job123/artifacts', 'job_id': 'job123', 'job_status_check': 'http://testapi/jobs/job123'}
def test_submit_job(enricher):
"""Test job submission method"""
whisper, _ = enricher
m = Media("test.mp4")
m.add_url(TEST_S3_URL)
with patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests") as mock_requests:
mock_response = MagicMock()
mock_response.status_code = 201
mock_response.json.return_value = {"id": "job123"}
mock_requests.post.return_value = mock_response
job_id = whisper.submit_job(m)
assert job_id == "job123"
def test_submit_raises_status(enricher):
whisper, _ = enricher
m = Media("test.mp4")
m.add_url(TEST_S3_URL)
with patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests") as mock_requests:
mock_response = MagicMock()
mock_response.status_code = 400
mock_response.json.return_value = {"id": "job123"}
mock_requests.post.return_value = mock_response
with pytest.raises(AssertionError) as exc_info:
whisper.submit_job(m)
assert str(exc_info.value) == "calling the whisper api http://testapi returned a non-success code: 400"
# @pytest.mark.parametrize("test_url, status", ["http://cdn.example.com/test.mp4",])
def test_submit_job_fails(enricher):
"""Test assertion fails with non-S3 URL"""
whisper, mock_s3 = enricher
m = Media("test.mp4")
m.add_url("http://cdn.wrongurl.com/test.mp4")
with pytest.raises(AssertionError):
whisper.submit_job(m)

Wyświetl plik

@ -162,4 +162,25 @@ def test_get_context():
def test_choose_most_complete():
pass
m_more = Metadata()
m_more.set_title("Title 1")
m_more.set_content("Content 1")
m_more.set_url("https://example.com")
m_less = Metadata()
m_less.set_title("Title 2")
m_less.set_content("Content 2")
m_less.set_url("https://example.com")
m_less.set_context("key", "value")
res = Metadata.choose_most_complete([m_more, m_less])
assert res.metadata.get("title") == "Title 1"
def test_choose_most_complete_from_pickles(unpickle):
# test most complete from pickles before and after an enricher has run
# Only compares length of media, not the actual media
m_before_enriching = unpickle("/Users/erinclark/PycharmProjects/auto-archiver/tests/data/metadata/metadata_enricher_ytshort_input.pickle")
m_after_enriching = unpickle("/Users/erinclark/PycharmProjects/auto-archiver/tests/data/metadata/metadata_enricher_ytshort_expected.pickle")
# Iterates `for r in results[1:]:`
res = Metadata.choose_most_complete([Metadata(), m_after_enriching, m_before_enriching])
assert res.media == m_after_enriching.media