diff --git a/poetry.lock b/poetry.lock index decadca..d61b908 100644 --- a/poetry.lock +++ b/poetry.lock @@ -172,18 +172,18 @@ lxml = ["lxml"] [[package]] name = "boto3" -version = "1.36.17" +version = "1.36.19" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "boto3-1.36.17-py3-none-any.whl", hash = "sha256:59bcf0c4b04d9cc36f8b418ad17ab3c4a99a21a175d2fad7096aa21cbe84630b"}, - {file = "boto3-1.36.17.tar.gz", hash = "sha256:5ecae20e780a3ce9afb3add532b61c466a8cb8960618e4fa565b3883064c1346"}, + {file = "boto3-1.36.19-py3-none-any.whl", hash = "sha256:7784590369a9d545bb07b2de56b6ce4d5a5e232883a957f704c3f842caeba155"}, + {file = "boto3-1.36.19.tar.gz", hash = "sha256:8c2c2a4ccdfe35dd2611ee1b7473dd2383948415c777e42dc4e7f1ebe371fe8c"}, ] [package.dependencies] -botocore = ">=1.36.17,<1.37.0" +botocore = ">=1.36.19,<1.37.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.11.0,<0.12.0" @@ -192,14 +192,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.36.17" +version = "1.36.19" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "botocore-1.36.17-py3-none-any.whl", hash = "sha256:069858b2fd693548035d7fd53a774e37e4260fea64e0ac9b8a3aee904f9321df"}, - {file = "botocore-1.36.17.tar.gz", hash = "sha256:cec13e0a7ce78e71aad0b397581b4e81824c7981ef4c261d2e296d200c399b09"}, + {file = "botocore-1.36.19-py3-none-any.whl", hash = "sha256:98882c106fec4c08678ea028199f7f5119550fab95d682b30846f7aae04b7bec"}, + {file = "botocore-1.36.19.tar.gz", hash = "sha256:cdf6729f601f82b1acdb9004b1f88b57cfb470f576394cdb3bbf5150f7fafb5b"}, ] [package.dependencies] @@ -860,14 +860,14 @@ tool = ["click (>=6.0.0)"] [[package]] name = "googleapis-common-protos" -version = "1.66.0" +version = "1.67.0" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"}, - {file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"}, + {file = "googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741"}, + {file = "googleapis_common_protos-1.67.0.tar.gz", hash = "sha256:21398025365f138be356d5923e9168737d94d46a72aefee4a6110a1f23463c86"}, ] [package.dependencies] @@ -1235,14 +1235,14 @@ files = [ [[package]] name = "myst-parser" -version = "4.0.0" +version = "4.0.1" description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser," optional = false python-versions = ">=3.10" groups = ["docs"] files = [ - {file = "myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d"}, - {file = "myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531"}, + {file = "myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d"}, + {file = "myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4"}, ] [package.dependencies] @@ -1254,10 +1254,10 @@ pyyaml = "*" sphinx = ">=7,<9" [package.extras] -code-style = ["pre-commit (>=3.0,<4.0)"] +code-style = ["pre-commit (>=4.0,<5.0)"] linkify = ["linkify-it-py (>=2.0,<3.0)"] rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-book-theme (>=1.1,<2.0)", "sphinx-copybutton", "sphinx-design", "sphinx-pyscript", "sphinx-tippy (>=0.4.3)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.9.0,<0.10.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] -testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"] +testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pygments (<2.19)", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"] testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"] [[package]] diff --git a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py index 89579f9..917ab85 100644 --- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py +++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py @@ -29,8 +29,7 @@ class WhisperEnricher(Enricher): job_results = {} for i, m in enumerate(to_enrich.media): if m.is_video() or m.is_audio(): - # TODO: this used to pass all storage items to store now - # Now only passing S3, the rest will get added later in the usual order (?) + # Only storing S3, the rest will get added later in the usual order (?) m.store(url=url, metadata=to_enrich, storages=[self.s3]) try: job_id = self.submit_job(m) diff --git a/tests/conftest.py b/tests/conftest.py index d7f484f..f7ed4b7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -133,14 +133,6 @@ def unpickle(): return _unpickle -@pytest.fixture -def mock_python_dependencies(): - with patch("auto_archiver.core.module") as mock_check_python_dep: - # Mock all Python dependencies as available - mock_check_python_dep.return_value = True - yield mock_check_python_dep - - @pytest.fixture def mock_binary_dependencies(): with patch("shutil.which") as mock_shutil_which: diff --git a/tests/data/metadata/metadata_enricher_ytshort_expected.pickle b/tests/data/metadata/metadata_enricher_ytshort_expected.pickle new file mode 100644 index 0000000..23ce5f6 Binary files /dev/null and b/tests/data/metadata/metadata_enricher_ytshort_expected.pickle differ diff --git a/tests/data/metadata/metadata_enricher_ytshort_input.pickle b/tests/data/metadata/metadata_enricher_ytshort_input.pickle new file mode 100644 index 0000000..5f1a4eb Binary files /dev/null and b/tests/data/metadata/metadata_enricher_ytshort_input.pickle differ diff --git a/tests/enrichers/test_metadata_enricher.py b/tests/enrichers/test_metadata_enricher.py index 314fca7..9dc410b 100644 --- a/tests/enrichers/test_metadata_enricher.py +++ b/tests/enrichers/test_metadata_enricher.py @@ -14,7 +14,7 @@ def mock_media(): @pytest.fixture -def enricher(setup_module): +def enricher(setup_module, mock_binary_dependencies): return setup_module("metadata_enricher", {}) @@ -74,3 +74,16 @@ def test_get_metadata_error_handling(mock_run, mock_logger_error, enricher): result = enricher.get_metadata("test.jpg") assert result == {} mock_logger_error.assert_called_once() + + +@pytest.mark.skip(reason="Requires ExifTool to be installed. TODO mock") +def test_metadata_pickle(enricher, unpickle): + # Uses a pickle of a YouTube short + metadata = unpickle("tests/data/metadata/metadata_enricher_ytshort_input.pickle") + expected = unpickle("tests/data/metadata/metadata_enricher_ytshort_expected.pickle") + enricher.enrich(metadata) + expected_media = expected.media + actual_media = metadata.media + assert len(expected_media) == len(actual_media) + assert actual_media[0].properties.get("metadata") == expected_media[0].properties.get("metadata") + assert metadata == expected \ No newline at end of file diff --git a/tests/enrichers/test_ssl_enricher.py b/tests/enrichers/test_ssl_enricher.py index c4d2dc5..29775f2 100644 --- a/tests/enrichers/test_ssl_enricher.py +++ b/tests/enrichers/test_ssl_enricher.py @@ -38,11 +38,13 @@ def test_empty_metadata(metadata, enricher): def test_ssl_enrich(metadata, enricher): with patch("ssl.get_server_certificate", return_value="TEST_CERT"), \ patch("builtins.open", mock_open()) as mock_file: + media_len_before = len(metadata.media) enricher.enrich(metadata) ssl.get_server_certificate.assert_called_once_with(("example.com", 443)) mock_file.assert_called_once_with(f"{enricher.tmp_dir}/example-com.pem", "w") mock_file().write.assert_called_once_with("TEST_CERT") + assert len(metadata.media) == media_len_before + 1 # Ensure the certificate is added to metadata assert any(media.filename.endswith("example-com.pem") for media in metadata.media) diff --git a/tests/enrichers/test_thumbnail_enricher.py b/tests/enrichers/test_thumbnail_enricher.py index eb27b99..14cfa0e 100644 --- a/tests/enrichers/test_thumbnail_enricher.py +++ b/tests/enrichers/test_thumbnail_enricher.py @@ -5,12 +5,12 @@ from auto_archiver.modules.thumbnail_enricher import ThumbnailEnricher @pytest.fixture -def thumbnail_enricher(setup_module) -> ThumbnailEnricher: - configs: dict = { +def thumbnail_enricher(setup_module, mock_binary_dependencies) -> ThumbnailEnricher: + config: dict = { "thumbnails_per_minute": 60, "max_thumbnails": 4, } - return setup_module("thumbnail_enricher", configs) + return setup_module("thumbnail_enricher", config) @pytest.fixture diff --git a/tests/enrichers/test_whisper_enricher.py b/tests/enrichers/test_whisper_enricher.py index 8a73ed7..873198f 100644 --- a/tests/enrichers/test_whisper_enricher.py +++ b/tests/enrichers/test_whisper_enricher.py @@ -8,6 +8,9 @@ from auto_archiver.modules.s3_storage import S3Storage from auto_archiver.modules.whisper_enricher import WhisperEnricher +TEST_S3_URL = "http://cdn.example.com/test.mp4" + + @pytest.fixture def enricher(): """Fixture with mocked S3 and API dependencies""" @@ -20,7 +23,7 @@ def enricher(): "steps": {"storages": ["s3_storage"]} } mock_s3 = MagicMock(spec=S3Storage) - mock_s3.get_cdn_url.return_value = "http://s3.example.com/media.mp3" + mock_s3.get_cdn_url.return_value = TEST_S3_URL instance = WhisperEnricher() instance.name = "whisper_enricher" instance.display_name = "Whisper Enricher" @@ -53,7 +56,7 @@ def test_successful_job_submission(enricher, metadata, mock_requests): """Test successful media processing with S3 configured""" whisper, mock_s3 = enricher # Configure mock S3 URL to match test expectation - mock_s3.get_cdn_url.return_value = "http://cdn.example.com/test.mp4" + mock_s3.get_cdn_url.return_value = TEST_S3_URL # Create test media with matching CDN URL m = Media("test.mp4") @@ -78,6 +81,7 @@ def test_successful_job_submission(enricher, metadata, mock_requests): mock_status_response, # First call: status check mock_artifacts_response # Second call: artifacts check ] + # Run enrichment (without opening file) whisper.enrich(metadata) # Check API interactions @@ -89,5 +93,43 @@ def test_successful_job_submission(enricher, metadata, mock_requests): # Verify job status checks assert mock_requests.get.call_count == 2 assert "artifact_0_text" in metadata.media[0].get("whisper_model") - assert "test transcript" in metadata.metadata.get("content") + assert metadata.media[0].get("whisper_model") == {'artifact_0_text': 'test transcript', 'job_artifacts_check': 'http://testapi/jobs/job123/artifacts', 'job_id': 'job123', 'job_status_check': 'http://testapi/jobs/job123'} + + + +def test_submit_job(enricher): + """Test job submission method""" + whisper, _ = enricher + m = Media("test.mp4") + m.add_url(TEST_S3_URL) + with patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests") as mock_requests: + mock_response = MagicMock() + mock_response.status_code = 201 + mock_response.json.return_value = {"id": "job123"} + mock_requests.post.return_value = mock_response + job_id = whisper.submit_job(m) + assert job_id == "job123" + +def test_submit_raises_status(enricher): + whisper, _ = enricher + m = Media("test.mp4") + m.add_url(TEST_S3_URL) + with patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests") as mock_requests: + mock_response = MagicMock() + mock_response.status_code = 400 + mock_response.json.return_value = {"id": "job123"} + mock_requests.post.return_value = mock_response + with pytest.raises(AssertionError) as exc_info: + whisper.submit_job(m) + assert str(exc_info.value) == "calling the whisper api http://testapi returned a non-success code: 400" + +# @pytest.mark.parametrize("test_url, status", ["http://cdn.example.com/test.mp4",]) +def test_submit_job_fails(enricher): + """Test assertion fails with non-S3 URL""" + whisper, mock_s3 = enricher + m = Media("test.mp4") + m.add_url("http://cdn.wrongurl.com/test.mp4") + with pytest.raises(AssertionError): + whisper.submit_job(m) + diff --git a/tests/test_metadata.py b/tests/test_metadata.py index b07e107..a753936 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -162,4 +162,25 @@ def test_get_context(): def test_choose_most_complete(): - pass \ No newline at end of file + m_more = Metadata() + m_more.set_title("Title 1") + m_more.set_content("Content 1") + m_more.set_url("https://example.com") + + m_less = Metadata() + m_less.set_title("Title 2") + m_less.set_content("Content 2") + m_less.set_url("https://example.com") + m_less.set_context("key", "value") + + res = Metadata.choose_most_complete([m_more, m_less]) + assert res.metadata.get("title") == "Title 1" + +def test_choose_most_complete_from_pickles(unpickle): + # test most complete from pickles before and after an enricher has run + # Only compares length of media, not the actual media + m_before_enriching = unpickle("/Users/erinclark/PycharmProjects/auto-archiver/tests/data/metadata/metadata_enricher_ytshort_input.pickle") + m_after_enriching = unpickle("/Users/erinclark/PycharmProjects/auto-archiver/tests/data/metadata/metadata_enricher_ytshort_expected.pickle") + # Iterates `for r in results[1:]:` + res = Metadata.choose_most_complete([Metadata(), m_after_enriching, m_before_enriching]) + assert res.media == m_after_enriching.media