yt-dlp/yt_dlp/extractor/jditemvideo.py

87 wiersze
4.0 KiB
Python
Czysty Wina Historia

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
import json
import random
import time
from .common import InfoExtractor
from ..utils import determine_ext, ExtractorError, traverse_obj
class JdItemVideoIE(InfoExtractor):
_VALID_URL = r"https://.+.jd.[a-z\.]{2,9}/(?P<id>\d{6,16}).html"
IE_NAME = 'jd-video'
IE_DESC = 'jd-video extractor'
_NETRC_MACHINE = False
_JD_API_VIDEO_CALLBACK_URL = 'https://cd.jd.com/tencent/video_v3?callback=jQuery{rand}&vid={video_id}&type=1&from=1&appid=24&_={timestamp}'
_TESTS = [
{
'url': 'https://npcitem.jd.hk/100030101538.html',
'info_dict': {
"id": "100030101538",
"ext": "mp4",
"title": "ipad 2021第九代",
"description": "【AppleiPad】Apple苹果 iPad 第9代 10.2英寸平板电脑 2021款 ipad964GB WLAN版/A13芯片/1200万像素/iPadOS深空灰色【行情 报价 价格 评测】-京东",
"size": 10251794,
"width": 1280,
"height": 1280,
"duration": 56,
"thumbnail": "https://jvod.300hu.com/img/2022/130871763/1/img7.jpg",
"url": "https://jvod.300hu.com/vod/product/6e02e2d8-98bc-491d-80a1-448ae5ea1c38/c6ef7b9b14ef4b9ca7e4cebda5b7684c.mp4?source=2&h265=h265/18799/a797504bd6f947dfbf6fdb96acfbb55f.mp4",
},
},
{
'url': 'https://npcitem.jd.hk/100030101538.html',
'info_dict': {
"id": "100037516759",
"ext": "mp4",
"title": "RODE Wireless Go II Dual",
"description": "【RODEWireless Go II Dual】罗德RODEWireless Go II Dual无线领夹麦克风单反手机无线小蜜蜂采访直播vlog收音 一拖二2代 标配【行情 报价 价格 评测】-京东",
"size": 7547769,
"width": 1280,
"height": 720,
"duration": 60,
"thumbnail": "https://jvod.300hu.com/img/2022/219535842/1/img7.jpg",
"url": "https://jvod.300hu.com/vod/product/1fc0661d-546e-446e-a429-a8db696ab06a/4067f4c3bb2d41c5af84081d2b0e3018.mp4?source=2&h265=h265/113074/cf365c28ca3a4fdb8178c4e44f916341.mp4",
},
},
]
def _real_extract(self, url):
item_id = self._match_id(url=url)
resp = self._download_webpage(url_or_request=url, video_id=item_id)
pattern_data = self._html_search_regex(pattern=r'"mainVideoId":"(\d+?)"', string=resp, name='videoId')
if pattern_data is None:
raise ExtractorError("There are no any video. %s" % url)
description = self._html_extract_title(resp)
rand = random.randint(433333, 999999)
timestamp = int(time.time() * 1000)
url = self._JD_API_VIDEO_CALLBACK_URL.format(rand=rand, timestamp=timestamp, video_id=pattern_data)
mp4resp = self._download_webpage(url_or_request=url, video_id=item_id)
detailResp = self._html_search_regex(pattern=r'jQuery\d+\((.+)\)', string=mp4resp, name='detail')
if detailResp is None:
raise ExtractorError("Callback fail. return: %s" % detailResp)
detailRespJson = json.loads(detailResp)
if detailRespJson.get("code", -1) != 0:
raise ExtractorError("Callback fail. return: %s" % detailResp)
ext = determine_ext(url=detailRespJson.get("playUrl", ""))
info_dict = {
'id': item_id,
'ext': ext,
'title': traverse_obj(detailRespJson, ('extInfo', 'videoName'), default="unknown_video_title"),
'description': description,
'size': traverse_obj(detailRespJson, ("extInfo", "size")),
'width': traverse_obj(detailRespJson, ("extInfo", "vwidth")),
'height': traverse_obj(detailRespJson, ("extInfo", "vheight")),
'duration': detailRespJson.get("duration"),
'thumbnail': detailRespJson.get("imageUrl"),
'url': detailRespJson.get("playUrl")
}
return info_dict