kopia lustrzana https://github.com/bellingcat/auto-archiver
Add unit tests for html_formatter, csv_db
rodzic
2353f9d6a5
commit
bbef80de4c
|
@ -2,16 +2,15 @@ name: Core Tests
|
|||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, staging ]
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- src/**
|
||||
pull_request:
|
||||
branches: [ main, staging ]
|
||||
paths:
|
||||
- src/**
|
||||
|
||||
jobs:
|
||||
test_back:
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
|
|
|
@ -11,7 +11,7 @@ on:
|
|||
- src/**
|
||||
|
||||
jobs:
|
||||
test_back:
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
|
@ -21,11 +21,28 @@ jobs:
|
|||
working-directory: ./
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
virtualenvs-path: .venv
|
||||
- name: Load cached venv
|
||||
id: cached-poetry-dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: .venv
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-dev
|
||||
- name: Install dependencies
|
||||
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
run: poetry install --no-interaction --no-root
|
||||
|
||||
- name: Install Package (Local)
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
import unittest
|
||||
import tempfile
|
||||
|
||||
from auto_archiver.core.context import ArchivingContext
|
||||
|
||||
ArchivingContext.reset(full_reset=True)
|
||||
ArchivingContext.set_tmp_dir(tempfile.gettempdir())
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -1,7 +0,0 @@
|
|||
import tempfile
|
||||
|
||||
from auto_archiver.core.context import ArchivingContext
|
||||
|
||||
|
||||
ArchivingContext.reset(full_reset=True)
|
||||
ArchivingContext.set_tmp_dir(tempfile.gettempdir())
|
|
@ -60,6 +60,16 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
|||
assert not username
|
||||
assert not tweet_id
|
||||
|
||||
def test_choose_variants(self):
|
||||
# taken from the response for url https://x.com/bellingcat/status/1871552600346415571
|
||||
variant_list = [{'content_type': 'application/x-mpegURL', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/pl/ovWo7ux-bKROwYIC.m3u8?tag=12&v=e1b'},
|
||||
{'bitrate': 256000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/480x270/OqZIrKV0LFswMvxS.mp4?tag=12'},
|
||||
{'bitrate': 832000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/640x360/uiDZDSmZ8MZn9hsi.mp4?tag=12'},
|
||||
{'bitrate': 2176000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/1280x720/6Y340Esh568WZnRZ.mp4?tag=12'}
|
||||
]
|
||||
chosen_variant = self.archiver.choose_variant(variant_list)
|
||||
assert chosen_variant == variant_list[3]
|
||||
|
||||
@pytest.mark.download
|
||||
def test_youtube_dlp_archiver(self):
|
||||
|
||||
|
@ -86,6 +96,17 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
|||
datetime.datetime(2023, 1, 24, 16, 25, 51, tzinfo=datetime.timezone.utc),
|
||||
"twitter-ytdl"
|
||||
)
|
||||
|
||||
@pytest.mark.download
|
||||
def test_download_video(self):
|
||||
url = "https://x.com/bellingcat/status/1871552600346415571"
|
||||
|
||||
post = self.archiver.download(self.create_item(url))
|
||||
self.assertValidResponseMetadata(
|
||||
post,
|
||||
"This month's Bellingchat Premium is with @KolinaKoltai. She reveals how she investigated a platform allowing users to create AI-generated child sexual abuse material and explains why it's crucial to investigate the people behind these services https://t.co/SfBUq0hSD0 https://t.co/rIHx0WlKp8",
|
||||
datetime.datetime(2024, 12, 24, 13, 44, 46, tzinfo=datetime.timezone.utc)
|
||||
)
|
||||
|
||||
@pytest.mark.download
|
||||
def test_download_sensitive_media(self):
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
import tempfile
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from auto_archiver.databases.csv_db import CSVDb
|
||||
from auto_archiver.core import Metadata
|
||||
|
||||
|
||||
|
||||
class TestCSVdb(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
_, temp_db = tempfile.mkstemp(suffix="csv")
|
||||
self.temp_db = temp_db
|
||||
|
||||
def tearDown(self):
|
||||
os.remove(self.temp_db)
|
||||
|
||||
def test_store_item(self):
|
||||
db = CSVDb({
|
||||
"csv_db": {"csv_file": self.temp_db}
|
||||
})
|
||||
|
||||
item = Metadata().set_url("http://example.com").set_title("Example").set_content("Example content").success("my-archiver")
|
||||
|
||||
db.done(item)
|
||||
|
||||
with open(self.temp_db, "r") as f:
|
||||
assert f.read().strip() == f"status,metadata,media\nmy-archiver: success,\"{{'_processed_at': {repr(item.get('_processed_at'))}, 'url': 'http://example.com', 'title': 'Example', 'content': 'Example content'}}\",[]"
|
||||
|
||||
# TODO: csv db doesn't have a fetch method - need to add it (?)
|
||||
# assert db.fetch(item) == item
|
|
@ -0,0 +1,31 @@
|
|||
import unittest
|
||||
|
||||
from auto_archiver.core.context import ArchivingContext
|
||||
from auto_archiver.formatters.html_formatter import HtmlFormatter
|
||||
from auto_archiver.core import Metadata, Media
|
||||
|
||||
|
||||
class TestHTMLFormatter(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
ArchivingContext.prev_algorithm = ArchivingContext.get("hash_enricher.algorithm", "")
|
||||
ArchivingContext.set("hash_enricher.algorithm", "SHA-256")
|
||||
return super().setUp()
|
||||
|
||||
def tearDown(self):
|
||||
ArchivingContext.set("hash_enricher.algorithm", ArchivingContext.prev_algorithm)
|
||||
del ArchivingContext.prev_algorithm
|
||||
return super().tearDown()
|
||||
|
||||
def test_format(self):
|
||||
formatter = HtmlFormatter({})
|
||||
metadata = Metadata().set("content", "Hello, world!").set_url('https://example.com')
|
||||
|
||||
final_media = formatter.format(metadata)
|
||||
self.assertIsInstance(final_media, Media)
|
||||
self.assertIn(".html", final_media.filename)
|
||||
with open (final_media.filename, "r") as f:
|
||||
content = f.read()
|
||||
self.assertIn("Hello, world!", content)
|
||||
self.assertEqual("text/html", final_media.mimetype)
|
||||
self.assertIn("SHA-256:", final_media.get('hash'))
|
Ładowanie…
Reference in New Issue