From bbef80de4c29c78acfb5ef7064031416f87ce635 Mon Sep 17 00:00:00 2001 From: Patrick Robertson Date: Mon, 13 Jan 2025 17:58:10 +0100 Subject: [PATCH] Add unit tests for html_formatter, csv_db --- .github/workflows/tests-core.yaml | 5 ++-- .github/workflows/tests-download.yaml | 23 ++++++++++++++--- tests/__init__.py | 6 +++++ tests/archivers/__init__.py | 7 ------ tests/archivers/test_twitter_archiver.py | 21 ++++++++++++++++ tests/databases/__init__.py | 0 tests/databases/test_csv_db.py | 32 ++++++++++++++++++++++++ tests/formatters/__init__.py | 0 tests/formatters/test_html_formatter.py | 31 +++++++++++++++++++++++ 9 files changed, 112 insertions(+), 13 deletions(-) create mode 100644 tests/databases/__init__.py create mode 100644 tests/databases/test_csv_db.py create mode 100644 tests/formatters/__init__.py create mode 100644 tests/formatters/test_html_formatter.py diff --git a/.github/workflows/tests-core.yaml b/.github/workflows/tests-core.yaml index 3cfbb22..dcdfb8f 100644 --- a/.github/workflows/tests-core.yaml +++ b/.github/workflows/tests-core.yaml @@ -2,16 +2,15 @@ name: Core Tests on: push: - branches: [ main, staging ] + branches: [ main ] paths: - src/** pull_request: - branches: [ main, staging ] paths: - src/** jobs: - test_back: + tests: runs-on: ubuntu-latest strategy: matrix: diff --git a/.github/workflows/tests-download.yaml b/.github/workflows/tests-download.yaml index 83b83c3..1dd7a1b 100644 --- a/.github/workflows/tests-download.yaml +++ b/.github/workflows/tests-download.yaml @@ -11,7 +11,7 @@ on: - src/** jobs: - test_back: + tests: runs-on: ubuntu-latest strategy: matrix: @@ -21,11 +21,28 @@ jobs: working-directory: ./ steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + virtualenvs-path: .venv + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v4 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-dev + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root + - name: Install Package (Local) run: | python -m pip install --upgrade pip diff --git a/tests/__init__.py b/tests/__init__.py index 1c35782..fe5c0da 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,10 @@ import unittest +import tempfile + +from auto_archiver.core.context import ArchivingContext + +ArchivingContext.reset(full_reset=True) +ArchivingContext.set_tmp_dir(tempfile.gettempdir()) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tests/archivers/__init__.py b/tests/archivers/__init__.py index 0a0357b..e69de29 100644 --- a/tests/archivers/__init__.py +++ b/tests/archivers/__init__.py @@ -1,7 +0,0 @@ -import tempfile - -from auto_archiver.core.context import ArchivingContext - - -ArchivingContext.reset(full_reset=True) -ArchivingContext.set_tmp_dir(tempfile.gettempdir()) \ No newline at end of file diff --git a/tests/archivers/test_twitter_archiver.py b/tests/archivers/test_twitter_archiver.py index d97509c..3eae3fb 100644 --- a/tests/archivers/test_twitter_archiver.py +++ b/tests/archivers/test_twitter_archiver.py @@ -60,6 +60,16 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase): assert not username assert not tweet_id + def test_choose_variants(self): + # taken from the response for url https://x.com/bellingcat/status/1871552600346415571 + variant_list = [{'content_type': 'application/x-mpegURL', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/pl/ovWo7ux-bKROwYIC.m3u8?tag=12&v=e1b'}, + {'bitrate': 256000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/480x270/OqZIrKV0LFswMvxS.mp4?tag=12'}, + {'bitrate': 832000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/640x360/uiDZDSmZ8MZn9hsi.mp4?tag=12'}, + {'bitrate': 2176000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/1280x720/6Y340Esh568WZnRZ.mp4?tag=12'} + ] + chosen_variant = self.archiver.choose_variant(variant_list) + assert chosen_variant == variant_list[3] + @pytest.mark.download def test_youtube_dlp_archiver(self): @@ -86,6 +96,17 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase): datetime.datetime(2023, 1, 24, 16, 25, 51, tzinfo=datetime.timezone.utc), "twitter-ytdl" ) + + @pytest.mark.download + def test_download_video(self): + url = "https://x.com/bellingcat/status/1871552600346415571" + + post = self.archiver.download(self.create_item(url)) + self.assertValidResponseMetadata( + post, + "This month's Bellingchat Premium is with @KolinaKoltai. She reveals how she investigated a platform allowing users to create AI-generated child sexual abuse material and explains why it's crucial to investigate the people behind these services https://t.co/SfBUq0hSD0 https://t.co/rIHx0WlKp8", + datetime.datetime(2024, 12, 24, 13, 44, 46, tzinfo=datetime.timezone.utc) + ) @pytest.mark.download def test_download_sensitive_media(self): diff --git a/tests/databases/__init__.py b/tests/databases/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/databases/test_csv_db.py b/tests/databases/test_csv_db.py new file mode 100644 index 0000000..c4539b9 --- /dev/null +++ b/tests/databases/test_csv_db.py @@ -0,0 +1,32 @@ +import tempfile +import os +import unittest + +from auto_archiver.databases.csv_db import CSVDb +from auto_archiver.core import Metadata + + + +class TestCSVdb(unittest.TestCase): + + def setUp(self): + _, temp_db = tempfile.mkstemp(suffix="csv") + self.temp_db = temp_db + + def tearDown(self): + os.remove(self.temp_db) + + def test_store_item(self): + db = CSVDb({ + "csv_db": {"csv_file": self.temp_db} + }) + + item = Metadata().set_url("http://example.com").set_title("Example").set_content("Example content").success("my-archiver") + + db.done(item) + + with open(self.temp_db, "r") as f: + assert f.read().strip() == f"status,metadata,media\nmy-archiver: success,\"{{'_processed_at': {repr(item.get('_processed_at'))}, 'url': 'http://example.com', 'title': 'Example', 'content': 'Example content'}}\",[]" + + # TODO: csv db doesn't have a fetch method - need to add it (?) + # assert db.fetch(item) == item \ No newline at end of file diff --git a/tests/formatters/__init__.py b/tests/formatters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/formatters/test_html_formatter.py b/tests/formatters/test_html_formatter.py new file mode 100644 index 0000000..13fedf4 --- /dev/null +++ b/tests/formatters/test_html_formatter.py @@ -0,0 +1,31 @@ +import unittest + +from auto_archiver.core.context import ArchivingContext +from auto_archiver.formatters.html_formatter import HtmlFormatter +from auto_archiver.core import Metadata, Media + + +class TestHTMLFormatter(unittest.TestCase): + + def setUp(self): + ArchivingContext.prev_algorithm = ArchivingContext.get("hash_enricher.algorithm", "") + ArchivingContext.set("hash_enricher.algorithm", "SHA-256") + return super().setUp() + + def tearDown(self): + ArchivingContext.set("hash_enricher.algorithm", ArchivingContext.prev_algorithm) + del ArchivingContext.prev_algorithm + return super().tearDown() + + def test_format(self): + formatter = HtmlFormatter({}) + metadata = Metadata().set("content", "Hello, world!").set_url('https://example.com') + + final_media = formatter.format(metadata) + self.assertIsInstance(final_media, Media) + self.assertIn(".html", final_media.filename) + with open (final_media.filename, "r") as f: + content = f.read() + self.assertIn("Hello, world!", content) + self.assertEqual("text/html", final_media.mimetype) + self.assertIn("SHA-256:", final_media.get('hash')) \ No newline at end of file