kopia lustrzana https://github.com/bellingcat/auto-archiver
Add more tests.
rodzic
3fce593aad
commit
319c1e8f92
|
@ -172,18 +172,18 @@ lxml = ["lxml"]
|
|||
|
||||
[[package]]
|
||||
name = "boto3"
|
||||
version = "1.36.17"
|
||||
version = "1.36.19"
|
||||
description = "The AWS SDK for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "boto3-1.36.17-py3-none-any.whl", hash = "sha256:59bcf0c4b04d9cc36f8b418ad17ab3c4a99a21a175d2fad7096aa21cbe84630b"},
|
||||
{file = "boto3-1.36.17.tar.gz", hash = "sha256:5ecae20e780a3ce9afb3add532b61c466a8cb8960618e4fa565b3883064c1346"},
|
||||
{file = "boto3-1.36.19-py3-none-any.whl", hash = "sha256:7784590369a9d545bb07b2de56b6ce4d5a5e232883a957f704c3f842caeba155"},
|
||||
{file = "boto3-1.36.19.tar.gz", hash = "sha256:8c2c2a4ccdfe35dd2611ee1b7473dd2383948415c777e42dc4e7f1ebe371fe8c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
botocore = ">=1.36.17,<1.37.0"
|
||||
botocore = ">=1.36.19,<1.37.0"
|
||||
jmespath = ">=0.7.1,<2.0.0"
|
||||
s3transfer = ">=0.11.0,<0.12.0"
|
||||
|
||||
|
@ -192,14 +192,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
|
|||
|
||||
[[package]]
|
||||
name = "botocore"
|
||||
version = "1.36.17"
|
||||
version = "1.36.19"
|
||||
description = "Low-level, data-driven core of boto 3."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "botocore-1.36.17-py3-none-any.whl", hash = "sha256:069858b2fd693548035d7fd53a774e37e4260fea64e0ac9b8a3aee904f9321df"},
|
||||
{file = "botocore-1.36.17.tar.gz", hash = "sha256:cec13e0a7ce78e71aad0b397581b4e81824c7981ef4c261d2e296d200c399b09"},
|
||||
{file = "botocore-1.36.19-py3-none-any.whl", hash = "sha256:98882c106fec4c08678ea028199f7f5119550fab95d682b30846f7aae04b7bec"},
|
||||
{file = "botocore-1.36.19.tar.gz", hash = "sha256:cdf6729f601f82b1acdb9004b1f88b57cfb470f576394cdb3bbf5150f7fafb5b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -860,14 +860,14 @@ tool = ["click (>=6.0.0)"]
|
|||
|
||||
[[package]]
|
||||
name = "googleapis-common-protos"
|
||||
version = "1.66.0"
|
||||
version = "1.67.0"
|
||||
description = "Common protobufs used in Google APIs"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"},
|
||||
{file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"},
|
||||
{file = "googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741"},
|
||||
{file = "googleapis_common_protos-1.67.0.tar.gz", hash = "sha256:21398025365f138be356d5923e9168737d94d46a72aefee4a6110a1f23463c86"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -1235,14 +1235,14 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "myst-parser"
|
||||
version = "4.0.0"
|
||||
version = "4.0.1"
|
||||
description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser,"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["docs"]
|
||||
files = [
|
||||
{file = "myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d"},
|
||||
{file = "myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531"},
|
||||
{file = "myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d"},
|
||||
{file = "myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -1254,10 +1254,10 @@ pyyaml = "*"
|
|||
sphinx = ">=7,<9"
|
||||
|
||||
[package.extras]
|
||||
code-style = ["pre-commit (>=3.0,<4.0)"]
|
||||
code-style = ["pre-commit (>=4.0,<5.0)"]
|
||||
linkify = ["linkify-it-py (>=2.0,<3.0)"]
|
||||
rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-book-theme (>=1.1,<2.0)", "sphinx-copybutton", "sphinx-design", "sphinx-pyscript", "sphinx-tippy (>=0.4.3)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.9.0,<0.10.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"]
|
||||
testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"]
|
||||
testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pygments (<2.19)", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"]
|
||||
testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -29,8 +29,7 @@ class WhisperEnricher(Enricher):
|
|||
job_results = {}
|
||||
for i, m in enumerate(to_enrich.media):
|
||||
if m.is_video() or m.is_audio():
|
||||
# TODO: this used to pass all storage items to store now
|
||||
# Now only passing S3, the rest will get added later in the usual order (?)
|
||||
# Only storing S3, the rest will get added later in the usual order (?)
|
||||
m.store(url=url, metadata=to_enrich, storages=[self.s3])
|
||||
try:
|
||||
job_id = self.submit_job(m)
|
||||
|
|
|
@ -133,14 +133,6 @@ def unpickle():
|
|||
return _unpickle
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_python_dependencies():
|
||||
with patch("auto_archiver.core.module") as mock_check_python_dep:
|
||||
# Mock all Python dependencies as available
|
||||
mock_check_python_dep.return_value = True
|
||||
yield mock_check_python_dep
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_binary_dependencies():
|
||||
with patch("shutil.which") as mock_shutil_which:
|
||||
|
|
Plik binarny nie jest wyświetlany.
Plik binarny nie jest wyświetlany.
|
@ -14,7 +14,7 @@ def mock_media():
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def enricher(setup_module):
|
||||
def enricher(setup_module, mock_binary_dependencies):
|
||||
return setup_module("metadata_enricher", {})
|
||||
|
||||
|
||||
|
@ -74,3 +74,16 @@ def test_get_metadata_error_handling(mock_run, mock_logger_error, enricher):
|
|||
result = enricher.get_metadata("test.jpg")
|
||||
assert result == {}
|
||||
mock_logger_error.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Requires ExifTool to be installed. TODO mock")
|
||||
def test_metadata_pickle(enricher, unpickle):
|
||||
# Uses a pickle of a YouTube short
|
||||
metadata = unpickle("tests/data/metadata/metadata_enricher_ytshort_input.pickle")
|
||||
expected = unpickle("tests/data/metadata/metadata_enricher_ytshort_expected.pickle")
|
||||
enricher.enrich(metadata)
|
||||
expected_media = expected.media
|
||||
actual_media = metadata.media
|
||||
assert len(expected_media) == len(actual_media)
|
||||
assert actual_media[0].properties.get("metadata") == expected_media[0].properties.get("metadata")
|
||||
assert metadata == expected
|
|
@ -38,11 +38,13 @@ def test_empty_metadata(metadata, enricher):
|
|||
def test_ssl_enrich(metadata, enricher):
|
||||
with patch("ssl.get_server_certificate", return_value="TEST_CERT"), \
|
||||
patch("builtins.open", mock_open()) as mock_file:
|
||||
media_len_before = len(metadata.media)
|
||||
enricher.enrich(metadata)
|
||||
|
||||
ssl.get_server_certificate.assert_called_once_with(("example.com", 443))
|
||||
mock_file.assert_called_once_with(f"{enricher.tmp_dir}/example-com.pem", "w")
|
||||
mock_file().write.assert_called_once_with("TEST_CERT")
|
||||
assert len(metadata.media) == media_len_before + 1
|
||||
# Ensure the certificate is added to metadata
|
||||
assert any(media.filename.endswith("example-com.pem") for media in metadata.media)
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@ from auto_archiver.modules.thumbnail_enricher import ThumbnailEnricher
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def thumbnail_enricher(setup_module) -> ThumbnailEnricher:
|
||||
configs: dict = {
|
||||
def thumbnail_enricher(setup_module, mock_binary_dependencies) -> ThumbnailEnricher:
|
||||
config: dict = {
|
||||
"thumbnails_per_minute": 60,
|
||||
"max_thumbnails": 4,
|
||||
}
|
||||
return setup_module("thumbnail_enricher", configs)
|
||||
return setup_module("thumbnail_enricher", config)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
|
@ -8,6 +8,9 @@ from auto_archiver.modules.s3_storage import S3Storage
|
|||
from auto_archiver.modules.whisper_enricher import WhisperEnricher
|
||||
|
||||
|
||||
TEST_S3_URL = "http://cdn.example.com/test.mp4"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def enricher():
|
||||
"""Fixture with mocked S3 and API dependencies"""
|
||||
|
@ -20,7 +23,7 @@ def enricher():
|
|||
"steps": {"storages": ["s3_storage"]}
|
||||
}
|
||||
mock_s3 = MagicMock(spec=S3Storage)
|
||||
mock_s3.get_cdn_url.return_value = "http://s3.example.com/media.mp3"
|
||||
mock_s3.get_cdn_url.return_value = TEST_S3_URL
|
||||
instance = WhisperEnricher()
|
||||
instance.name = "whisper_enricher"
|
||||
instance.display_name = "Whisper Enricher"
|
||||
|
@ -53,7 +56,7 @@ def test_successful_job_submission(enricher, metadata, mock_requests):
|
|||
"""Test successful media processing with S3 configured"""
|
||||
whisper, mock_s3 = enricher
|
||||
# Configure mock S3 URL to match test expectation
|
||||
mock_s3.get_cdn_url.return_value = "http://cdn.example.com/test.mp4"
|
||||
mock_s3.get_cdn_url.return_value = TEST_S3_URL
|
||||
|
||||
# Create test media with matching CDN URL
|
||||
m = Media("test.mp4")
|
||||
|
@ -78,6 +81,7 @@ def test_successful_job_submission(enricher, metadata, mock_requests):
|
|||
mock_status_response, # First call: status check
|
||||
mock_artifacts_response # Second call: artifacts check
|
||||
]
|
||||
|
||||
# Run enrichment (without opening file)
|
||||
whisper.enrich(metadata)
|
||||
# Check API interactions
|
||||
|
@ -89,5 +93,43 @@ def test_successful_job_submission(enricher, metadata, mock_requests):
|
|||
# Verify job status checks
|
||||
assert mock_requests.get.call_count == 2
|
||||
assert "artifact_0_text" in metadata.media[0].get("whisper_model")
|
||||
assert "test transcript" in metadata.metadata.get("content")
|
||||
assert metadata.media[0].get("whisper_model") == {'artifact_0_text': 'test transcript', 'job_artifacts_check': 'http://testapi/jobs/job123/artifacts', 'job_id': 'job123', 'job_status_check': 'http://testapi/jobs/job123'}
|
||||
|
||||
|
||||
|
||||
def test_submit_job(enricher):
|
||||
"""Test job submission method"""
|
||||
whisper, _ = enricher
|
||||
m = Media("test.mp4")
|
||||
m.add_url(TEST_S3_URL)
|
||||
with patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests") as mock_requests:
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 201
|
||||
mock_response.json.return_value = {"id": "job123"}
|
||||
mock_requests.post.return_value = mock_response
|
||||
job_id = whisper.submit_job(m)
|
||||
assert job_id == "job123"
|
||||
|
||||
def test_submit_raises_status(enricher):
|
||||
whisper, _ = enricher
|
||||
m = Media("test.mp4")
|
||||
m.add_url(TEST_S3_URL)
|
||||
with patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests") as mock_requests:
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 400
|
||||
mock_response.json.return_value = {"id": "job123"}
|
||||
mock_requests.post.return_value = mock_response
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
whisper.submit_job(m)
|
||||
assert str(exc_info.value) == "calling the whisper api http://testapi returned a non-success code: 400"
|
||||
|
||||
# @pytest.mark.parametrize("test_url, status", ["http://cdn.example.com/test.mp4",])
|
||||
def test_submit_job_fails(enricher):
|
||||
"""Test assertion fails with non-S3 URL"""
|
||||
whisper, mock_s3 = enricher
|
||||
m = Media("test.mp4")
|
||||
m.add_url("http://cdn.wrongurl.com/test.mp4")
|
||||
with pytest.raises(AssertionError):
|
||||
whisper.submit_job(m)
|
||||
|
||||
|
||||
|
|
|
@ -162,4 +162,25 @@ def test_get_context():
|
|||
|
||||
|
||||
def test_choose_most_complete():
|
||||
pass
|
||||
m_more = Metadata()
|
||||
m_more.set_title("Title 1")
|
||||
m_more.set_content("Content 1")
|
||||
m_more.set_url("https://example.com")
|
||||
|
||||
m_less = Metadata()
|
||||
m_less.set_title("Title 2")
|
||||
m_less.set_content("Content 2")
|
||||
m_less.set_url("https://example.com")
|
||||
m_less.set_context("key", "value")
|
||||
|
||||
res = Metadata.choose_most_complete([m_more, m_less])
|
||||
assert res.metadata.get("title") == "Title 1"
|
||||
|
||||
def test_choose_most_complete_from_pickles(unpickle):
|
||||
# test most complete from pickles before and after an enricher has run
|
||||
# Only compares length of media, not the actual media
|
||||
m_before_enriching = unpickle("/Users/erinclark/PycharmProjects/auto-archiver/tests/data/metadata/metadata_enricher_ytshort_input.pickle")
|
||||
m_after_enriching = unpickle("/Users/erinclark/PycharmProjects/auto-archiver/tests/data/metadata/metadata_enricher_ytshort_expected.pickle")
|
||||
# Iterates `for r in results[1:]:`
|
||||
res = Metadata.choose_most_complete([Metadata(), m_after_enriching, m_before_enriching])
|
||||
assert res.media == m_after_enriching.media
|
||||
|
|
Ładowanie…
Reference in New Issue