kopia lustrzana https://github.com/bellingcat/auto-archiver
178 wiersze
7.2 KiB
Python
178 wiersze
7.2 KiB
Python
from datetime import datetime, timezone
|
|
import time
|
|
import pytest
|
|
import yt_dlp
|
|
|
|
from auto_archiver.modules.generic_extractor.generic_extractor import GenericExtractor
|
|
from auto_archiver.modules.generic_extractor.tiktok import Tiktok, TikTokIE
|
|
|
|
from .test_extractor_base import TestExtractorBase
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def skip_ytdlp_own_methods(mocker):
|
|
# mock this method, so that we skip the ytdlp download in these tests
|
|
mocker.patch("auto_archiver.modules.generic_extractor.tiktok.Tiktok.skip_ytdlp_download", return_value=True)
|
|
mocker.patch(
|
|
"auto_archiver.modules.generic_extractor.generic_extractor.GenericExtractor.suitable_extractors",
|
|
return_value=[e for e in yt_dlp.YoutubeDL()._ies.values() if e.IE_NAME == "TikTok"],
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_get(mocker):
|
|
return mocker.patch("auto_archiver.modules.generic_extractor.tiktok.requests.get")
|
|
|
|
|
|
@pytest.fixture
|
|
def tiktok_dropin() -> Tiktok:
|
|
return Tiktok()
|
|
|
|
|
|
class TestTiktokTikwmExtractor(TestExtractorBase):
|
|
"""
|
|
Test suite for TestTiktokTikwmExtractor.
|
|
"""
|
|
|
|
extractor_module = "generic_extractor"
|
|
extractor: GenericExtractor
|
|
|
|
config = {}
|
|
|
|
VALID_EXAMPLE_URL = "https://www.tiktok.com/@example/video/1234"
|
|
|
|
@pytest.mark.parametrize(
|
|
"url, is_suitable",
|
|
[
|
|
("https://bellingcat.com", False),
|
|
("https://youtube.com", False),
|
|
("https://tiktok.co/", False),
|
|
("https://tiktok.com/", False),
|
|
("https://www.tiktok.com/", False),
|
|
("https://api.cool.tiktok.com/", False),
|
|
(VALID_EXAMPLE_URL, True),
|
|
("https://www.tiktok.com/@bbcnews/video/7478038212070411542", True),
|
|
("https://www.tiktok.com/@ggs68taiwan.official/video/7441821351142362375", True),
|
|
("https://www.tiktok.com/t/ZP8YQ8e5j/", True),
|
|
("https://vt.tiktok.com/ZSMTJeqRP/", True),
|
|
],
|
|
)
|
|
def test_is_suitable(self, url, is_suitable, tiktok_dropin):
|
|
assert tiktok_dropin.suitable(url, TikTokIE()) == is_suitable
|
|
|
|
def test_invalid_json_responses(self, mock_get, make_item, caplog):
|
|
mock_get.return_value.status_code = 200
|
|
mock_get.return_value.json.side_effect = ValueError
|
|
with caplog.at_level("DEBUG"):
|
|
assert self.extractor.download(make_item(self.VALID_EXAMPLE_URL)) is False
|
|
mock_get.assert_called_once()
|
|
mock_get.return_value.json.assert_called_once()
|
|
# first message is just the 'Skipping using ytdlp to download files for TikTok' message
|
|
assert (
|
|
"failed to parse JSON response from tikwm.com for url='https://www.tiktok.com/@example/video/1234'"
|
|
in caplog.text
|
|
)
|
|
|
|
mock_get.return_value.json.side_effect = Exception
|
|
with caplog.at_level("ERROR"):
|
|
assert self.extractor.download(make_item(self.VALID_EXAMPLE_URL)) is False
|
|
mock_get.assert_called()
|
|
assert mock_get.call_count == 2
|
|
assert mock_get.return_value.json.call_count == 2
|
|
assert (
|
|
"failed to parse JSON response from tikwm.com for url='https://www.tiktok.com/@example/video/1234'"
|
|
in caplog.text
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
"response",
|
|
[
|
|
({"msg": "failure"}),
|
|
({"msg": "success"}),
|
|
],
|
|
)
|
|
def test_unsuccessful_responses(self, mock_get, make_item, response, caplog):
|
|
mock_get.return_value.status_code = 200
|
|
mock_get.return_value.json.return_value = response
|
|
with caplog.at_level("DEBUG"):
|
|
assert self.extractor.download(make_item(self.VALID_EXAMPLE_URL)) is False
|
|
mock_get.assert_called_once()
|
|
mock_get.return_value.json.assert_called_once()
|
|
assert "failed to get a valid response from tikwm.com" in caplog.text
|
|
|
|
@pytest.mark.parametrize(
|
|
"response,has_vid",
|
|
[
|
|
({"data": {"id": 123}}, False),
|
|
({"data": {"wmplay": "url"}}, True),
|
|
({"data": {"play": "url"}}, True),
|
|
],
|
|
)
|
|
def test_correct_extraction(self, mock_get, make_item, response, has_vid, mocker):
|
|
mock_get.return_value.status_code = 200
|
|
mock_get.return_value.json.return_value = {"msg": "success", **response}
|
|
result = self.extractor.download(make_item(self.VALID_EXAMPLE_URL))
|
|
if not has_vid:
|
|
assert result is False
|
|
else:
|
|
assert result.is_success()
|
|
assert len(result.media) == 1
|
|
mock_get.assert_called()
|
|
assert mock_get.call_count == 1 + int(has_vid)
|
|
mock_get.return_value.json.assert_called_once()
|
|
|
|
def test_correct_data_extracted(self, mock_get, make_item):
|
|
mock_get.return_value.status_code = 200
|
|
mock_get.return_value.json.return_value = {
|
|
"msg": "success",
|
|
"data": {
|
|
"wmplay": "url",
|
|
"origin_cover": "cover.jpg",
|
|
"title": "Title",
|
|
"id": 123,
|
|
"duration": 60,
|
|
"create_time": 1736301699,
|
|
"author": "Author",
|
|
"other": "data",
|
|
},
|
|
}
|
|
|
|
result = self.extractor.download(make_item(self.VALID_EXAMPLE_URL))
|
|
assert result.is_success()
|
|
assert len(result.media) == 2
|
|
assert result.get_title() == "Title"
|
|
assert result.get("author") == "Author"
|
|
assert result.get("api_data") == {"other": "data", "id": 123}
|
|
assert result.media[1].get("duration") == 60
|
|
assert result.get("timestamp") == datetime.fromtimestamp(1736301699, tz=timezone.utc)
|
|
|
|
@pytest.mark.download
|
|
def test_download_video(self, make_item):
|
|
url = "https://www.tiktok.com/@bbcnews/video/7478038212070411542"
|
|
|
|
result = self.extractor.download(make_item(url))
|
|
assert result.is_success()
|
|
assert len(result.media) == 2
|
|
assert (
|
|
result.get_title()
|
|
== "The A23a iceberg is one of the world's oldest and it's so big you can see it from space. #Iceberg #A23a #Antarctica #Ice #ClimateChange #DavidAttenborough #Ocean #Sea #SouthGeorgia #BBCNews "
|
|
)
|
|
assert result.get("author").get("unique_id") == "bbcnews"
|
|
assert result.get("api_data").get("id") == "7478038212070411542"
|
|
assert result.media[1].get("duration") == 59
|
|
assert result.get("timestamp") == datetime.fromtimestamp(1741122000, tz=timezone.utc)
|
|
|
|
@pytest.mark.download
|
|
def test_download_sensitive_video(self, make_item):
|
|
url = "https://www.tiktok.com/@ggs68taiwan.official/video/7441821351142362375"
|
|
# Required for rate limiting
|
|
time.sleep(1.1)
|
|
result = self.extractor.download(make_item(url))
|
|
assert result.is_success()
|
|
assert len(result.media) == 2
|
|
assert result.get_title() == "Căng nhất lúc này #ggs68 #ggs68taiwan #taiwan #dailoan #tiktoknews"
|
|
assert result.get("author").get("id") == "7197400619475649562"
|
|
assert result.get("api_data").get("id") == "7441821351142362375"
|
|
assert result.media[1].get("duration") == 34
|
|
assert result.get("timestamp") == datetime.fromtimestamp(1732684060, tz=timezone.utc)
|