kopia lustrzana https://github.com/bellingcat/auto-archiver
Add unit tests for html_formatter, csv_db
rodzic
2353f9d6a5
commit
bbef80de4c
|
@ -2,16 +2,15 @@ name: Core Tests
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main, staging ]
|
branches: [ main ]
|
||||||
paths:
|
paths:
|
||||||
- src/**
|
- src/**
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main, staging ]
|
|
||||||
paths:
|
paths:
|
||||||
- src/**
|
- src/**
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test_back:
|
tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
|
|
|
@ -11,7 +11,7 @@ on:
|
||||||
- src/**
|
- src/**
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test_back:
|
tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
|
@ -21,11 +21,28 @@ jobs:
|
||||||
working-directory: ./
|
working-directory: ./
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
|
- name: Install Poetry
|
||||||
|
uses: snok/install-poetry@v1
|
||||||
|
with:
|
||||||
|
virtualenvs-create: true
|
||||||
|
virtualenvs-in-project: true
|
||||||
|
virtualenvs-path: .venv
|
||||||
|
- name: Load cached venv
|
||||||
|
id: cached-poetry-dependencies
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: .venv
|
||||||
|
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-dev
|
||||||
|
- name: Install dependencies
|
||||||
|
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||||
|
run: poetry install --no-interaction --no-root
|
||||||
|
|
||||||
- name: Install Package (Local)
|
- name: Install Package (Local)
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
|
|
@ -1,4 +1,10 @@
|
||||||
import unittest
|
import unittest
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
from auto_archiver.core.context import ArchivingContext
|
||||||
|
|
||||||
|
ArchivingContext.reset(full_reset=True)
|
||||||
|
ArchivingContext.set_tmp_dir(tempfile.gettempdir())
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
|
@ -1,7 +0,0 @@
|
||||||
import tempfile
|
|
||||||
|
|
||||||
from auto_archiver.core.context import ArchivingContext
|
|
||||||
|
|
||||||
|
|
||||||
ArchivingContext.reset(full_reset=True)
|
|
||||||
ArchivingContext.set_tmp_dir(tempfile.gettempdir())
|
|
|
@ -60,6 +60,16 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
||||||
assert not username
|
assert not username
|
||||||
assert not tweet_id
|
assert not tweet_id
|
||||||
|
|
||||||
|
def test_choose_variants(self):
|
||||||
|
# taken from the response for url https://x.com/bellingcat/status/1871552600346415571
|
||||||
|
variant_list = [{'content_type': 'application/x-mpegURL', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/pl/ovWo7ux-bKROwYIC.m3u8?tag=12&v=e1b'},
|
||||||
|
{'bitrate': 256000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/480x270/OqZIrKV0LFswMvxS.mp4?tag=12'},
|
||||||
|
{'bitrate': 832000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/640x360/uiDZDSmZ8MZn9hsi.mp4?tag=12'},
|
||||||
|
{'bitrate': 2176000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/1280x720/6Y340Esh568WZnRZ.mp4?tag=12'}
|
||||||
|
]
|
||||||
|
chosen_variant = self.archiver.choose_variant(variant_list)
|
||||||
|
assert chosen_variant == variant_list[3]
|
||||||
|
|
||||||
@pytest.mark.download
|
@pytest.mark.download
|
||||||
def test_youtube_dlp_archiver(self):
|
def test_youtube_dlp_archiver(self):
|
||||||
|
|
||||||
|
@ -87,6 +97,17 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase):
|
||||||
"twitter-ytdl"
|
"twitter-ytdl"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@pytest.mark.download
|
||||||
|
def test_download_video(self):
|
||||||
|
url = "https://x.com/bellingcat/status/1871552600346415571"
|
||||||
|
|
||||||
|
post = self.archiver.download(self.create_item(url))
|
||||||
|
self.assertValidResponseMetadata(
|
||||||
|
post,
|
||||||
|
"This month's Bellingchat Premium is with @KolinaKoltai. She reveals how she investigated a platform allowing users to create AI-generated child sexual abuse material and explains why it's crucial to investigate the people behind these services https://t.co/SfBUq0hSD0 https://t.co/rIHx0WlKp8",
|
||||||
|
datetime.datetime(2024, 12, 24, 13, 44, 46, tzinfo=datetime.timezone.utc)
|
||||||
|
)
|
||||||
|
|
||||||
@pytest.mark.download
|
@pytest.mark.download
|
||||||
def test_download_sensitive_media(self):
|
def test_download_sensitive_media(self):
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from auto_archiver.databases.csv_db import CSVDb
|
||||||
|
from auto_archiver.core import Metadata
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TestCSVdb(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
_, temp_db = tempfile.mkstemp(suffix="csv")
|
||||||
|
self.temp_db = temp_db
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
os.remove(self.temp_db)
|
||||||
|
|
||||||
|
def test_store_item(self):
|
||||||
|
db = CSVDb({
|
||||||
|
"csv_db": {"csv_file": self.temp_db}
|
||||||
|
})
|
||||||
|
|
||||||
|
item = Metadata().set_url("http://example.com").set_title("Example").set_content("Example content").success("my-archiver")
|
||||||
|
|
||||||
|
db.done(item)
|
||||||
|
|
||||||
|
with open(self.temp_db, "r") as f:
|
||||||
|
assert f.read().strip() == f"status,metadata,media\nmy-archiver: success,\"{{'_processed_at': {repr(item.get('_processed_at'))}, 'url': 'http://example.com', 'title': 'Example', 'content': 'Example content'}}\",[]"
|
||||||
|
|
||||||
|
# TODO: csv db doesn't have a fetch method - need to add it (?)
|
||||||
|
# assert db.fetch(item) == item
|
|
@ -0,0 +1,31 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from auto_archiver.core.context import ArchivingContext
|
||||||
|
from auto_archiver.formatters.html_formatter import HtmlFormatter
|
||||||
|
from auto_archiver.core import Metadata, Media
|
||||||
|
|
||||||
|
|
||||||
|
class TestHTMLFormatter(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
ArchivingContext.prev_algorithm = ArchivingContext.get("hash_enricher.algorithm", "")
|
||||||
|
ArchivingContext.set("hash_enricher.algorithm", "SHA-256")
|
||||||
|
return super().setUp()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
ArchivingContext.set("hash_enricher.algorithm", ArchivingContext.prev_algorithm)
|
||||||
|
del ArchivingContext.prev_algorithm
|
||||||
|
return super().tearDown()
|
||||||
|
|
||||||
|
def test_format(self):
|
||||||
|
formatter = HtmlFormatter({})
|
||||||
|
metadata = Metadata().set("content", "Hello, world!").set_url('https://example.com')
|
||||||
|
|
||||||
|
final_media = formatter.format(metadata)
|
||||||
|
self.assertIsInstance(final_media, Media)
|
||||||
|
self.assertIn(".html", final_media.filename)
|
||||||
|
with open (final_media.filename, "r") as f:
|
||||||
|
content = f.read()
|
||||||
|
self.assertIn("Hello, world!", content)
|
||||||
|
self.assertEqual("text/html", final_media.mimetype)
|
||||||
|
self.assertIn("SHA-256:", final_media.get('hash'))
|
Ładowanie…
Reference in New Issue