2025-03-10 11:56:45 +00:00
from datetime import datetime , timezone
import time
import pytest
2025-03-13 15:59:42 +00:00
import yt_dlp
2025-03-10 11:56:45 +00:00
2025-03-13 15:59:42 +00:00
from auto_archiver . modules . generic_extractor . generic_extractor import GenericExtractor
2025-03-18 09:59:59 +00:00
from auto_archiver . modules . generic_extractor . tiktok import Tiktok , TikTokIE
2025-03-10 11:56:45 +00:00
from . test_extractor_base import TestExtractorBase
2025-03-13 15:59:42 +00:00
@pytest.fixture ( autouse = True )
def skip_ytdlp_own_methods ( mocker ) :
# mock this method, so that we skip the ytdlp download in these tests
mocker . patch ( " auto_archiver.modules.generic_extractor.tiktok.Tiktok.skip_ytdlp_download " , return_value = True )
2025-03-14 10:50:13 +00:00
mocker . patch (
" auto_archiver.modules.generic_extractor.generic_extractor.GenericExtractor.suitable_extractors " ,
return_value = [ e for e in yt_dlp . YoutubeDL ( ) . _ies . values ( ) if e . IE_NAME == " TikTok " ] ,
)
2025-03-13 15:59:42 +00:00
2025-03-18 09:59:59 +00:00
@pytest.fixture
2025-03-13 15:59:42 +00:00
def mock_get ( mocker ) :
return mocker . patch ( " auto_archiver.modules.generic_extractor.tiktok.requests.get " )
2025-03-14 10:50:13 +00:00
2025-03-18 09:59:59 +00:00
@pytest.fixture
def tiktok_dropin ( ) - > Tiktok :
return Tiktok ( )
2025-03-10 11:56:45 +00:00
class TestTiktokTikwmExtractor ( TestExtractorBase ) :
"""
Test suite for TestTiktokTikwmExtractor .
"""
2025-03-13 15:59:42 +00:00
extractor_module = " generic_extractor "
extractor : GenericExtractor
2025-03-10 11:56:45 +00:00
config = { }
VALID_EXAMPLE_URL = " https://www.tiktok.com/@example/video/1234 "
2025-03-18 09:59:59 +00:00
@pytest.mark.parametrize (
" url, is_suitable " ,
[
( " https://bellingcat.com " , False ) ,
( " https://youtube.com " , False ) ,
( " https://tiktok.co/ " , False ) ,
( " https://tiktok.com/ " , False ) ,
( " https://www.tiktok.com/ " , False ) ,
( " https://api.cool.tiktok.com/ " , False ) ,
( VALID_EXAMPLE_URL , True ) ,
( " https://www.tiktok.com/@bbcnews/video/7478038212070411542 " , True ) ,
( " https://www.tiktok.com/@ggs68taiwan.official/video/7441821351142362375 " , True ) ,
( " https://www.tiktok.com/t/ZP8YQ8e5j/ " , True ) ,
( " https://vt.tiktok.com/ZSMTJeqRP/ " , True ) ,
] ,
)
def test_is_suitable ( self , url , is_suitable , tiktok_dropin ) :
assert tiktok_dropin . suitable ( url , TikTokIE ( ) ) == is_suitable
2025-03-13 15:59:42 +00:00
def test_invalid_json_responses ( self , mock_get , make_item , caplog ) :
2025-03-10 11:56:45 +00:00
mock_get . return_value . status_code = 200
mock_get . return_value . json . side_effect = ValueError
2025-03-14 10:50:13 +00:00
with caplog . at_level ( " DEBUG " ) :
2025-03-14 12:11:24 +00:00
assert self . extractor . download ( make_item ( self . VALID_EXAMPLE_URL ) ) is False
2025-03-13 15:59:42 +00:00
mock_get . assert_called_once ( )
mock_get . return_value . json . assert_called_once ( )
# first message is just the 'Skipping using ytdlp to download files for TikTok' message
2025-03-14 10:50:13 +00:00
assert (
" failed to parse JSON response from tikwm.com for url= ' https://www.tiktok.com/@example/video/1234 ' "
in caplog . text
)
2025-03-10 11:56:45 +00:00
2025-03-13 13:57:04 +00:00
mock_get . return_value . json . side_effect = Exception
2025-03-14 10:50:13 +00:00
with caplog . at_level ( " ERROR " ) :
2025-03-14 12:11:24 +00:00
assert self . extractor . download ( make_item ( self . VALID_EXAMPLE_URL ) ) is False
2025-03-13 15:59:42 +00:00
mock_get . assert_called ( )
assert mock_get . call_count == 2
assert mock_get . return_value . json . call_count == 2
2025-03-14 10:50:13 +00:00
assert (
" failed to parse JSON response from tikwm.com for url= ' https://www.tiktok.com/@example/video/1234 ' "
in caplog . text
)
2025-03-10 11:56:45 +00:00
2025-03-13 13:21:32 +00:00
@pytest.mark.parametrize (
" response " ,
[
( { " msg " : " failure " } ) ,
( { " msg " : " success " } ) ,
] ,
)
2025-03-13 15:59:42 +00:00
def test_unsuccessful_responses ( self , mock_get , make_item , response , caplog ) :
2025-03-10 11:56:45 +00:00
mock_get . return_value . status_code = 200
mock_get . return_value . json . return_value = response
2025-03-14 10:50:13 +00:00
with caplog . at_level ( " DEBUG " ) :
2025-03-14 12:11:24 +00:00
assert self . extractor . download ( make_item ( self . VALID_EXAMPLE_URL ) ) is False
2025-03-13 15:59:42 +00:00
mock_get . assert_called_once ( )
mock_get . return_value . json . assert_called_once ( )
2025-03-13 16:07:49 +00:00
assert " failed to get a valid response from tikwm.com " in caplog . text
2025-03-10 11:56:45 +00:00
2025-03-13 13:21:32 +00:00
@pytest.mark.parametrize (
" response,has_vid " ,
[
( { " data " : { " id " : 123 } } , False ) ,
( { " data " : { " wmplay " : " url " } } , True ) ,
( { " data " : { " play " : " url " } } , True ) ,
] ,
)
2025-03-14 12:11:24 +00:00
def test_correct_extraction ( self , mock_get , make_item , response , has_vid , mocker ) :
2025-03-10 11:56:45 +00:00
mock_get . return_value . status_code = 200
mock_get . return_value . json . return_value = { " msg " : " success " , * * response }
result = self . extractor . download ( make_item ( self . VALID_EXAMPLE_URL ) )
if not has_vid :
2025-03-12 14:27:45 +00:00
assert result is False
2025-03-10 11:56:45 +00:00
else :
assert result . is_success ( )
assert len ( result . media ) == 1
mock_get . assert_called ( )
assert mock_get . call_count == 1 + int ( has_vid )
mock_get . return_value . json . assert_called_once ( )
2025-03-14 12:11:24 +00:00
def test_correct_data_extracted ( self , mock_get , make_item ) :
2025-03-10 11:56:45 +00:00
mock_get . return_value . status_code = 200
2025-03-13 13:21:32 +00:00
mock_get . return_value . json . return_value = {
" msg " : " success " ,
" data " : {
" wmplay " : " url " ,
" origin_cover " : " cover.jpg " ,
" title " : " Title " ,
" id " : 123 ,
" duration " : 60 ,
" create_time " : 1736301699 ,
" author " : " Author " ,
" other " : " data " ,
} ,
}
2025-03-10 11:56:45 +00:00
result = self . extractor . download ( make_item ( self . VALID_EXAMPLE_URL ) )
assert result . is_success ( )
assert len ( result . media ) == 2
assert result . get_title ( ) == " Title "
assert result . get ( " author " ) == " Author "
assert result . get ( " api_data " ) == { " other " : " data " , " id " : 123 }
assert result . media [ 1 ] . get ( " duration " ) == 60
assert result . get ( " timestamp " ) == datetime . fromtimestamp ( 1736301699 , tz = timezone . utc )
@pytest.mark.download
def test_download_video ( self , make_item ) :
url = " https://www.tiktok.com/@bbcnews/video/7478038212070411542 "
result = self . extractor . download ( make_item ( url ) )
assert result . is_success ( )
assert len ( result . media ) == 2
2025-03-13 13:21:32 +00:00
assert (
result . get_title ( )
== " The A23a iceberg is one of the world ' s oldest and it ' s so big you can see it from space. #Iceberg #A23a #Antarctica #Ice #ClimateChange #DavidAttenborough #Ocean #Sea #SouthGeorgia #BBCNews "
)
2025-03-10 11:56:45 +00:00
assert result . get ( " author " ) . get ( " unique_id " ) == " bbcnews "
2025-03-13 13:21:32 +00:00
assert result . get ( " api_data " ) . get ( " id " ) == " 7478038212070411542 "
2025-03-10 11:56:45 +00:00
assert result . media [ 1 ] . get ( " duration " ) == 59
assert result . get ( " timestamp " ) == datetime . fromtimestamp ( 1741122000 , tz = timezone . utc )
@pytest.mark.download
2025-03-13 15:23:35 +00:00
def test_download_sensitive_video ( self , make_item ) :
2025-03-10 11:56:45 +00:00
url = " https://www.tiktok.com/@ggs68taiwan.official/video/7441821351142362375 "
2025-03-13 15:23:35 +00:00
# Required for rate limiting
time . sleep ( 1.1 )
2025-03-10 11:56:45 +00:00
result = self . extractor . download ( make_item ( url ) )
assert result . is_success ( )
assert len ( result . media ) == 2
assert result . get_title ( ) == " Căng nhất lúc này #ggs68 #ggs68taiwan #taiwan #dailoan #tiktoknews "
assert result . get ( " author " ) . get ( " id " ) == " 7197400619475649562 "
2025-03-13 13:21:32 +00:00
assert result . get ( " api_data " ) . get ( " id " ) == " 7441821351142362375 "
2025-03-10 11:56:45 +00:00
assert result . media [ 1 ] . get ( " duration " ) == 34
assert result . get ( " timestamp " ) == datetime . fromtimestamp ( 1732684060 , tz = timezone . utc )