Add unit tests for html_formatter, csv_db

pull/163/head
Patrick Robertson 2025-01-13 17:58:10 +01:00
rodzic 2353f9d6a5
commit bbef80de4c
9 zmienionych plików z 112 dodań i 13 usunięć

Wyświetl plik

@ -2,16 +2,15 @@ name: Core Tests
on:
push:
branches: [ main, staging ]
branches: [ main ]
paths:
- src/**
pull_request:
branches: [ main, staging ]
paths:
- src/**
jobs:
test_back:
tests:
runs-on: ubuntu-latest
strategy:
matrix:

Wyświetl plik

@ -11,7 +11,7 @@ on:
- src/**
jobs:
test_back:
tests:
runs-on: ubuntu-latest
strategy:
matrix:
@ -21,11 +21,28 @@ jobs:
working-directory: ./
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
virtualenvs-path: .venv
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-dev
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root
- name: Install Package (Local)
run: |
python -m pip install --upgrade pip

Wyświetl plik

@ -1,4 +1,10 @@
import unittest
import tempfile
from auto_archiver.core.context import ArchivingContext
ArchivingContext.reset(full_reset=True)
ArchivingContext.set_tmp_dir(tempfile.gettempdir())
if __name__ == '__main__':
unittest.main()

Wyświetl plik

@ -1,7 +0,0 @@
import tempfile
from auto_archiver.core.context import ArchivingContext
ArchivingContext.reset(full_reset=True)
ArchivingContext.set_tmp_dir(tempfile.gettempdir())

Wyświetl plik

@ -60,6 +60,16 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
assert not username
assert not tweet_id
def test_choose_variants(self):
# taken from the response for url https://x.com/bellingcat/status/1871552600346415571
variant_list = [{'content_type': 'application/x-mpegURL', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/pl/ovWo7ux-bKROwYIC.m3u8?tag=12&v=e1b'},
{'bitrate': 256000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/480x270/OqZIrKV0LFswMvxS.mp4?tag=12'},
{'bitrate': 832000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/640x360/uiDZDSmZ8MZn9hsi.mp4?tag=12'},
{'bitrate': 2176000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/1280x720/6Y340Esh568WZnRZ.mp4?tag=12'}
]
chosen_variant = self.archiver.choose_variant(variant_list)
assert chosen_variant == variant_list[3]
@pytest.mark.download
def test_youtube_dlp_archiver(self):
@ -86,6 +96,17 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
datetime.datetime(2023, 1, 24, 16, 25, 51, tzinfo=datetime.timezone.utc),
"twitter-ytdl"
)
@pytest.mark.download
def test_download_video(self):
url = "https://x.com/bellingcat/status/1871552600346415571"
post = self.archiver.download(self.create_item(url))
self.assertValidResponseMetadata(
post,
"This month's Bellingchat Premium is with @KolinaKoltai. She reveals how she investigated a platform allowing users to create AI-generated child sexual abuse material and explains why it's crucial to investigate the people behind these services https://t.co/SfBUq0hSD0 https://t.co/rIHx0WlKp8",
datetime.datetime(2024, 12, 24, 13, 44, 46, tzinfo=datetime.timezone.utc)
)
@pytest.mark.download
def test_download_sensitive_media(self):

Wyświetl plik

Wyświetl plik

@ -0,0 +1,32 @@
import tempfile
import os
import unittest
from auto_archiver.databases.csv_db import CSVDb
from auto_archiver.core import Metadata
class TestCSVdb(unittest.TestCase):
def setUp(self):
_, temp_db = tempfile.mkstemp(suffix="csv")
self.temp_db = temp_db
def tearDown(self):
os.remove(self.temp_db)
def test_store_item(self):
db = CSVDb({
"csv_db": {"csv_file": self.temp_db}
})
item = Metadata().set_url("http://example.com").set_title("Example").set_content("Example content").success("my-archiver")
db.done(item)
with open(self.temp_db, "r") as f:
assert f.read().strip() == f"status,metadata,media\nmy-archiver: success,\"{{'_processed_at': {repr(item.get('_processed_at'))}, 'url': 'http://example.com', 'title': 'Example', 'content': 'Example content'}}\",[]"
# TODO: csv db doesn't have a fetch method - need to add it (?)
# assert db.fetch(item) == item

Wyświetl plik

Wyświetl plik

@ -0,0 +1,31 @@
import unittest
from auto_archiver.core.context import ArchivingContext
from auto_archiver.formatters.html_formatter import HtmlFormatter
from auto_archiver.core import Metadata, Media
class TestHTMLFormatter(unittest.TestCase):
def setUp(self):
ArchivingContext.prev_algorithm = ArchivingContext.get("hash_enricher.algorithm", "")
ArchivingContext.set("hash_enricher.algorithm", "SHA-256")
return super().setUp()
def tearDown(self):
ArchivingContext.set("hash_enricher.algorithm", ArchivingContext.prev_algorithm)
del ArchivingContext.prev_algorithm
return super().tearDown()
def test_format(self):
formatter = HtmlFormatter({})
metadata = Metadata().set("content", "Hello, world!").set_url('https://example.com')
final_media = formatter.format(metadata)
self.assertIsInstance(final_media, Media)
self.assertIn(".html", final_media.filename)
with open (final_media.filename, "r") as f:
content = f.read()
self.assertIn("Hello, world!", content)
self.assertEqual("text/html", final_media.mimetype)
self.assertIn("SHA-256:", final_media.get('hash'))