[tiktok] Detect embeds

Closes #3799
pull/3812/head
pukkandan 2022-05-20 06:01:08 +05:30
rodzic 0b9c08b47b
commit b801cd7179
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 7EEE9E1E817D0A39
2 zmienionych plików z 24 dodań i 10 usunięć

Wyświetl plik

@ -74,6 +74,7 @@ from .teachable import TeachableIE
from .ted import TedEmbedIE
from .theplatform import ThePlatformIE
from .threeqsdn import ThreeQSDNIE
from .tiktok import TikTokIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .tube8 import Tube8IE
from .tunein import TuneInBaseIE
@ -3756,6 +3757,11 @@ class GenericIE(InfoExtractor):
if ruutu_urls:
return self.playlist_from_matches(ruutu_urls, video_id, video_title)
# Look for Tiktok embeds
tiktok_urls = TikTokIE._extract_urls(webpage)
if tiktok_urls:
return self.playlist_from_matches(tiktok_urls, video_id, video_title)
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:

Wyświetl plik

@ -1,28 +1,26 @@
import itertools
import json
import random
import re
import string
import time
import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse
)
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
HEADRequest,
LazyList,
UnsupportedError,
get_first,
int_or_none,
join_nonempty,
LazyList,
qualities,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
try_get,
url_or_none,
qualities,
)
@ -36,6 +34,10 @@ class TikTokBaseIE(InfoExtractor):
_WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p')
@staticmethod
def _create_url(user_id, video_id):
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
@ -361,7 +363,7 @@ class TikTokBaseIE(InfoExtractor):
class TikTokIE(TikTokBaseIE):
_VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)'
_VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
@ -466,7 +468,7 @@ class TikTokIE(TikTokBaseIE):
'info_dict': {
'id': '7059698374567611694',
'ext': 'mp4',
'title': 'tiktok video #7059698374567611694',
'title': 'TikTok video #7059698374567611694',
'description': '',
'uploader': 'pokemonlife22',
'creator': 'Pokemon',
@ -490,6 +492,11 @@ class TikTokIE(TikTokBaseIE):
'only_matching': True
}]
@classmethod
def _extract_urls(cls, webpage):
return [mobj.group('url') for mobj in re.finditer(
rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{cls._VALID_URL})', webpage)]
def _extract_aweme_app(self, aweme_id):
try:
aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
@ -506,7 +513,8 @@ class TikTokIE(TikTokBaseIE):
return self._parse_aweme_video_app(aweme_detail)
def _real_extract(self, url):
video_id = self._match_id(url)
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
url = self._create_url(user_id, video_id)
try:
return self._extract_aweme_app(video_id)