Add unit tests for html_formatter, csv_db

2025-01-13 17:58:10 +01:00 · 2025-01-13 17:58:10 +01:00 · bbef80de4c
commit bbef80de4c
--- a/.github/workflows/tests-core.yaml
+++ b/.github/workflows/tests-core.yaml
@ -2,16 +2,15 @@ name: Core Tests

 on:
  push:
-    branches: [ main, staging ]
+    branches: [ main ]
    paths:
      - src/**
  pull_request:
-    branches: [ main, staging ]
    paths:
      - src/**

 jobs:
-  test_back:
+  tests:
    runs-on: ubuntu-latest
    strategy:
      matrix:
--- a/.github/workflows/tests-download.yaml
+++ b/.github/workflows/tests-download.yaml
@ -11,7 +11,7 @@ on:
      - src/**

 jobs:
-  test_back:
+  tests:
    runs-on: ubuntu-latest
    strategy:
      matrix:
@ -21,11 +21,28 @@ jobs:
        working-directory: ./

    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          virtualenvs-path: .venv
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v4
+        with:
+          path: .venv
+          key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-dev
+      - name: Install dependencies
+        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+        run: poetry install --no-interaction --no-root
+
      - name: Install Package (Local)
        run: |
          python -m pip install --upgrade pip
--- a/tests/init.py
+++ b/tests/init.py
@ -1,4 +1,10 @@
 import unittest
+import tempfile
+
+from auto_archiver.core.context import ArchivingContext
+
+ArchivingContext.reset(full_reset=True)
+ArchivingContext.set_tmp_dir(tempfile.gettempdir())

 if __name__ == '__main__':
    unittest.main()
--- a/tests/archivers/init.py
+++ b/tests/archivers/init.py
@ -1,7 +0,0 @@
-import tempfile
-
-from auto_archiver.core.context import ArchivingContext
-
-
-ArchivingContext.reset(full_reset=True)
-ArchivingContext.set_tmp_dir(tempfile.gettempdir())
--- a/tests/archivers/test_twitter_archiver.py
+++ b/tests/archivers/test_twitter_archiver.py
@ -60,6 +60,16 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
        assert not username
        assert not tweet_id

+    def test_choose_variants(self):
+        # taken from the response for url https://x.com/bellingcat/status/1871552600346415571
+        variant_list = [{'content_type': 'application/x-mpegURL', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/pl/ovWo7ux-bKROwYIC.m3u8?tag=12&v=e1b'},
+                        {'bitrate': 256000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/480x270/OqZIrKV0LFswMvxS.mp4?tag=12'},
+                        {'bitrate': 832000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/640x360/uiDZDSmZ8MZn9hsi.mp4?tag=12'},
+                        {'bitrate': 2176000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/1280x720/6Y340Esh568WZnRZ.mp4?tag=12'}
+                        ]
+        chosen_variant = self.archiver.choose_variant(variant_list)
+        assert chosen_variant == variant_list[3]
+
    @pytest.mark.download
    def test_youtube_dlp_archiver(self):

@ -86,6 +96,17 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
            datetime.datetime(2023, 1, 24, 16, 25, 51, tzinfo=datetime.timezone.utc),
            "twitter-ytdl"
        )
+    
+    @pytest.mark.download
+    def test_download_video(self):
+        url = "https://x.com/bellingcat/status/1871552600346415571"
+
+        post = self.archiver.download(self.create_item(url))
+        self.assertValidResponseMetadata(
+            post,
+            "This month's Bellingchat Premium is with @KolinaKoltai. She reveals how she investigated a platform allowing users to create AI-generated child sexual abuse material and explains why it's crucial to investigate the people behind these services https://t.co/SfBUq0hSD0 https://t.co/rIHx0WlKp8",
+            datetime.datetime(2024, 12, 24, 13, 44, 46, tzinfo=datetime.timezone.utc)
+        )

    @pytest.mark.download
    def test_download_sensitive_media(self):
--- a/tests/databases/init.py
+++ b/tests/databases/init.py
--- a/tests/databases/test_csv_db.py
+++ b/tests/databases/test_csv_db.py
@ -0,0 +1,32 @@
+import tempfile
+import os
+import unittest
+
+from auto_archiver.databases.csv_db import CSVDb
+from auto_archiver.core import Metadata
+
+
+
+class TestCSVdb(unittest.TestCase):
+
+    def setUp(self):
+        _, temp_db = tempfile.mkstemp(suffix="csv")
+        self.temp_db = temp_db
+
+    def tearDown(self):
+        os.remove(self.temp_db)
+
+    def test_store_item(self):
+        db = CSVDb({
+            "csv_db": {"csv_file": self.temp_db}
+            })
+
+        item = Metadata().set_url("http://example.com").set_title("Example").set_content("Example content").success("my-archiver")
+
+        db.done(item)
+
+        with open(self.temp_db, "r") as f:
+            assert f.read().strip() == f"status,metadata,media\nmy-archiver: success,\"{{'_processed_at': {repr(item.get('_processed_at'))}, 'url': 'http://example.com', 'title': 'Example', 'content': 'Example content'}}\",[]"
+
+        # TODO: csv db doesn't have a fetch method - need to add it (?)
+        # assert db.fetch(item) == item
--- a/tests/formatters/init.py
+++ b/tests/formatters/init.py
--- a/tests/formatters/test_html_formatter.py
+++ b/tests/formatters/test_html_formatter.py
@ -0,0 +1,31 @@
+import unittest
+
+from auto_archiver.core.context import ArchivingContext
+from auto_archiver.formatters.html_formatter import HtmlFormatter
+from auto_archiver.core import Metadata, Media
+
+
+class TestHTMLFormatter(unittest.TestCase):
+
+    def setUp(self):
+        ArchivingContext.prev_algorithm = ArchivingContext.get("hash_enricher.algorithm", "")
+        ArchivingContext.set("hash_enricher.algorithm", "SHA-256")
+        return super().setUp()
+    
+    def tearDown(self):
+        ArchivingContext.set("hash_enricher.algorithm", ArchivingContext.prev_algorithm)
+        del ArchivingContext.prev_algorithm
+        return super().tearDown()
+
+    def test_format(self):
+        formatter = HtmlFormatter({})
+        metadata = Metadata().set("content", "Hello, world!").set_url('https://example.com')
+
+        final_media = formatter.format(metadata)
+        self.assertIsInstance(final_media, Media)
+        self.assertIn(".html", final_media.filename)
+        with open (final_media.filename, "r") as f:
+            content = f.read()
+            self.assertIn("Hello, world!", content)
+        self.assertEqual("text/html", final_media.mimetype)
+        self.assertIn("SHA-256:", final_media.get('hash'))