package org.schabi.newpipe.extractor.services.youtube; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.mozilla.javascript.Context; import org.mozilla.javascript.Function; import org.mozilla.javascript.ScriptableObject; import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.Subtitles; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.IOException; import java.util.*; /* * Created by Christian Schabesberger on 06.08.15. * * Copyright (C) Christian Schabesberger 2015 * YoutubeStreamExtractor.java is part of NewPipe. * * NewPipe is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * NewPipe is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with NewPipe. If not, see . */ public class YoutubeStreamExtractor extends StreamExtractor { private static final String TAG = YoutubeStreamExtractor.class.getSimpleName(); /*////////////////////////////////////////////////////////////////////////// // Exceptions //////////////////////////////////////////////////////////////////////////*/ public class DecryptException extends ParsingException { DecryptException(String message, Throwable cause) { super(message, cause); } } public class GemaException extends ContentNotAvailableException { GemaException(String message) { super(message); } } public class SubtitlesException extends ContentNotAvailableException { SubtitlesException(String message, Throwable cause) { super(message, cause); } } /*//////////////////////////////////////////////////////////////////////////*/ private Document doc; @Nullable private JsonObject playerArgs; @Nonnull private final Map videoInfoPage = new HashMap<>(); @Nonnull private List subtitlesInfos = new ArrayList<>(); private boolean isAgeRestricted; public YoutubeStreamExtractor(StreamingService service, String url) { super(service, url); } /*////////////////////////////////////////////////////////////////////////// // Impl //////////////////////////////////////////////////////////////////////////*/ @Nonnull @Override public String getId() throws ParsingException { try { return getUrlIdHandler().getId(getCleanUrl()); } catch (Exception e) { throw new ParsingException("Could not get stream id"); } } @Nonnull @Override public String getName() throws ParsingException { assertPageFetched(); String name = getStringFromMetaData("title"); if(name == null) { // Fallback to HTML method try { name = doc.select("meta[name=title]").attr(CONTENT); } catch (Exception e) { throw new ParsingException("Could not get the title", e); } } if(name == null || name.isEmpty()) { throw new ParsingException("Could not get the title"); } return name; } @Nonnull @Override public String getUploadDate() throws ParsingException { assertPageFetched(); try { return doc.select("meta[itemprop=datePublished]").attr(CONTENT); } catch (Exception e) {//todo: add fallback method throw new ParsingException("Could not get upload date", e); } } @Nonnull @Override public String getThumbnailUrl() throws ParsingException { assertPageFetched(); // Try to get high resolution thumbnail first, if it fails, use low res from the player instead try { return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); } catch (Exception ignored) { // Try other method... } try { if (playerArgs != null && playerArgs.isString("thumbnail_url")) return playerArgs.getString("thumbnail_url"); } catch (Exception ignored) { // Try other method... } try { return videoInfoPage.get("thumbnail_url"); } catch (Exception e) { throw new ParsingException("Could not get thumbnail url", e); } } @Nonnull @Override public String getDescription() throws ParsingException { assertPageFetched(); try { return doc.select("p[id=\"eow-description\"]").first().html(); } catch (Exception e) {//todo: add fallback method <-- there is no ... as long as i know throw new ParsingException("Could not get the description", e); } } @Override public int getAgeLimit() throws ParsingException { assertPageFetched(); if (!isAgeRestricted) { return NO_AGE_LIMIT; } try { return Integer.valueOf(doc.select("meta[property=\"og:restrictions:age\"]") .attr(CONTENT).replace("+", "")); } catch (Exception e) { throw new ParsingException("Could not get age restriction"); } } @Override public long getLength() throws ParsingException { assertPageFetched(); if(playerArgs != null) { try { long returnValue = Long.parseLong(playerArgs.get("length_seconds") + ""); if (returnValue >= 0) return returnValue; } catch (Exception ignored) { // Try other method... } } String lengthString = videoInfoPage.get("length_seconds"); try { return Long.parseLong(lengthString); } catch (Exception ignored) { // Try other method... } // TODO: 25.11.17 Implement a way to get the length for age restricted videos #44 try { // Fallback to HTML method return Long.parseLong(doc.select("div[class~=\"ytp-progress-bar\"][role=\"slider\"]").first() .attr("aria-valuemax")); } catch (Exception e) { throw new ParsingException("Could not get video length", e); } } /** * Attempts to parse (and return) the offset to start playing the video from. * * @return the offset (in seconds), or 0 if no timestamp is found. */ @Override public long getTimeStamp() throws ParsingException { return getTimestampSeconds("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)"); } @Override public long getViewCount() throws ParsingException { assertPageFetched(); try { return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT)); } catch (Exception e) {//todo: find fallback method throw new ParsingException("Could not get number of views", e); } } @Override public long getLikeCount() throws ParsingException { assertPageFetched(); String likesString = ""; try { Element button = doc.select("button.like-button-renderer-like-button").first(); try { likesString = button.select("span.yt-uix-button-content").first().text(); } catch (NullPointerException e) { //if this kicks in our button has no content and therefore likes/dislikes are disabled return -1; } return Integer.parseInt(Utils.removeNonDigitCharacters(likesString)); } catch (NumberFormatException nfe) { throw new ParsingException("Could not parse \"" + likesString + "\" as an Integer", nfe); } catch (Exception e) { throw new ParsingException("Could not get like count", e); } } @Override public long getDislikeCount() throws ParsingException { assertPageFetched(); String dislikesString = ""; try { Element button = doc.select("button.like-button-renderer-dislike-button").first(); try { dislikesString = button.select("span.yt-uix-button-content").first().text(); } catch (NullPointerException e) { //if this kicks in our button has no content and therefore likes/dislikes are disabled return -1; } return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString)); } catch (NumberFormatException nfe) { throw new ParsingException("Could not parse \"" + dislikesString + "\" as an Integer", nfe); } catch (Exception e) { throw new ParsingException("Could not get dislike count", e); } } @Nonnull @Override public String getUploaderUrl() throws ParsingException { assertPageFetched(); try { return doc.select("div[class=\"yt-user-info\"]").first().children() .select("a").first().attr("abs:href"); } catch (Exception e) { throw new ParsingException("Could not get channel link", e); } } @Nullable private String getStringFromMetaData(String field) { assertPageFetched(); String value = null; if(playerArgs != null) { // This can not fail value = playerArgs.getString(field); } if(value == null) { // This can not fail too value = videoInfoPage.get(field); } return value; } @Nonnull @Override public String getUploaderName() throws ParsingException { assertPageFetched(); String name = getStringFromMetaData("author"); if(name == null) { try { // Fallback to HTML method name = doc.select("div.yt-user-info").first().text(); } catch (Exception e) { throw new ParsingException("Could not get uploader name", e); } } if(name == null || name.isEmpty()) { throw new ParsingException("Could not get uploader name"); } return name; } @Nonnull @Override public String getUploaderAvatarUrl() throws ParsingException { assertPageFetched(); try { return doc.select("a[class*=\"yt-user-photo\"]").first() .select("img").first() .attr("abs:data-thumb"); } catch (Exception e) {//todo: add fallback method throw new ParsingException("Could not get uploader thumbnail URL.", e); } } @Nonnull @Override public String getDashMpdUrl() throws ParsingException { assertPageFetched(); try { String dashManifestUrl; if (videoInfoPage.containsKey("dashmpd")) { dashManifestUrl = videoInfoPage.get("dashmpd"); } else if (playerArgs != null && playerArgs.isString("dashmpd")) { dashManifestUrl = playerArgs.getString("dashmpd", ""); } else { return ""; } if (!dashManifestUrl.contains("/signature/")) { String encryptedSig = Parser.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifestUrl); String decryptedSig; decryptedSig = decryptSignature(encryptedSig, decryptionCode); dashManifestUrl = dashManifestUrl.replace("/s/" + encryptedSig, "/signature/" + decryptedSig); } return dashManifestUrl; } catch (Exception e) { throw new ParsingException("Could not get dash manifest url", e); } } @Nonnull @Override public String getHlsUrl() throws ParsingException { assertPageFetched(); try { String hlsvp; if (playerArgs != null && playerArgs.isString("hlsvp")) { hlsvp = playerArgs.getString("hlsvp", ""); } else { return ""; } return hlsvp; } catch (Exception e) { throw new ParsingException("Could not get hls manifest url", e); } } @Override public List getAudioStreams() throws IOException, ExtractionException { assertPageFetched(); List audioStreams = new ArrayList<>(); try { for (Map.Entry entry : getItags(ADAPTIVE_FMTS, ItagItem.ItagType.AUDIO).entrySet()) { ItagItem itag = entry.getValue(); AudioStream audioStream = new AudioStream(entry.getKey(), itag.getMediaFormat(), itag.avgBitrate); if (!Stream.containSimilarStream(audioStream, audioStreams)) { audioStreams.add(audioStream); } } } catch (Exception e) { throw new ParsingException("Could not get audio streams", e); } return audioStreams; } @Override public List getVideoStreams() throws IOException, ExtractionException { assertPageFetched(); List videoStreams = new ArrayList<>(); try { for (Map.Entry entry : getItags(URL_ENCODED_FMT_STREAM_MAP, ItagItem.ItagType.VIDEO).entrySet()) { ItagItem itag = entry.getValue(); VideoStream videoStream = new VideoStream(entry.getKey(), itag.getMediaFormat(), itag.resolutionString); if (!Stream.containSimilarStream(videoStream, videoStreams)) { videoStreams.add(videoStream); } } } catch (Exception e) { throw new ParsingException("Could not get video streams", e); } return videoStreams; } @Override public List getVideoOnlyStreams() throws IOException, ExtractionException { assertPageFetched(); List videoOnlyStreams = new ArrayList<>(); try { for (Map.Entry entry : getItags(ADAPTIVE_FMTS, ItagItem.ItagType.VIDEO_ONLY).entrySet()) { ItagItem itag = entry.getValue(); VideoStream videoStream = new VideoStream(entry.getKey(), itag.getMediaFormat(), itag.resolutionString, true); if (!Stream.containSimilarStream(videoStream, videoOnlyStreams)) { videoOnlyStreams.add(videoStream); } } } catch (Exception e) { throw new ParsingException("Could not get video only streams", e); } return videoOnlyStreams; } @Override @Nonnull public List getSubtitlesDefault() throws IOException, ExtractionException { return getSubtitles(SubtitlesFormat.TTML); } @Override @Nonnull public List getSubtitles(final SubtitlesFormat format) throws IOException, ExtractionException { assertPageFetched(); List subtitles = new ArrayList<>(); for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) { subtitles.add(subtitlesInfo.getSubtitle(format)); } return subtitles; } @Override public StreamType getStreamType() throws ParsingException { assertPageFetched(); try { if (playerArgs != null && (playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live") || playerArgs.get(URL_ENCODED_FMT_STREAM_MAP).toString().isEmpty())) { return StreamType.LIVE_STREAM; } } catch (Exception e) { throw new ParsingException("Could not get hls manifest url", e); } return StreamType.VIDEO_STREAM; } @Override public StreamInfoItem getNextVideo() throws IOException, ExtractionException { assertPageFetched(); try { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); collector.commit(extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]") .first().select("li").first())); return collector.getItems().get(0); } catch (Exception e) { throw new ParsingException("Could not get next video", e); } } @Override public StreamInfoItemsCollector getRelatedVideos() throws IOException, ExtractionException { assertPageFetched(); try { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); Element ul = doc.select("ul[id=\"watch-related\"]").first(); if (ul != null) { for (Element li : ul.children()) { // first check if we have a playlist. If so leave them out if (li.select("a[class*=\"content-link\"]").first() != null) { collector.commit(extractVideoPreviewInfo(li)); } } } return collector; } catch (Exception e) { throw new ParsingException("Could not get related videos", e); } } /** * {@inheritDoc} */ @Override public String getErrorMessage() { String errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text(); StringBuilder errorReason; if (errorMessage == null || errorMessage.isEmpty()) { errorReason = null; } else if (errorMessage.contains("GEMA")) { // Gema sometimes blocks youtube music content in germany: // https://www.gema.de/en/ // Detailed description: // https://en.wikipedia.org/wiki/GEMA_%28German_organization%29 errorReason = new StringBuilder("GEMA"); } else { errorReason = new StringBuilder(errorMessage); errorReason.append(" "); errorReason.append(doc.select("[id=\"unavailable-submessage\"]").first().text()); } return errorReason != null ? errorReason.toString() : null; } /*////////////////////////////////////////////////////////////////////////// // Fetch page //////////////////////////////////////////////////////////////////////////*/ private static final String URL_ENCODED_FMT_STREAM_MAP = "url_encoded_fmt_stream_map"; private static final String ADAPTIVE_FMTS = "adaptive_fmts"; private static final String HTTPS = "https:"; private static final String CONTENT = "content"; private static final String DECRYPTION_FUNC_NAME = "decrypt"; private static final String VERIFIED_URL_PARAMS = "&has_verified=1&bpctr=9999999999"; private volatile String decryptionCode = ""; private String pageHtml = null; private String getPageHtml(Downloader downloader) throws IOException, ExtractionException { final String verifiedUrl = getCleanUrl() + VERIFIED_URL_PARAMS; if (pageHtml == null) { pageHtml = downloader.download(verifiedUrl); } return pageHtml; } @Override public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { final String pageContent = getPageHtml(downloader); doc = Jsoup.parse(pageContent, getCleanUrl()); final String playerUrl; // Check if the video is age restricted if (pageContent.contains(" getAvailableSubtitlesInfo() throws SubtitlesException { // If the video is age restricted getPlayerConfig will fail if(isAgeRestricted) return Collections.emptyList(); final JsonObject playerConfig; try { playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader())); } catch (IOException | ExtractionException e) { throw new SubtitlesException("Unable to download player configs", e); } final String playerResponse = playerConfig.getObject("args", new JsonObject()) .getString("player_response"); final JsonObject captions; try { if (playerResponse == null || !JsonParser.object().from(playerResponse).has("captions")) { // Captions does not exist return Collections.emptyList(); } captions = JsonParser.object().from(playerResponse).getObject("captions"); } catch (JsonParserException e) { throw new SubtitlesException("Unable to parse subtitles listing", e); } final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer", new JsonObject()); final JsonArray captionsArray = renderer.getArray("captionTracks", new JsonArray()); // todo: use this to apply auto translation to different language from a source language final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray()); // This check is necessary since there may be cases where subtitles metadata do not contain caption track info // e.g. https://www.youtube.com/watch?v=-Vpwatutnko final int captionsSize = captionsArray.size(); if(captionsSize == 0) return Collections.emptyList(); List result = new ArrayList<>(); for (int i = 0; i < captionsSize; i++) { final String languageCode = captionsArray.getObject(i).getString("languageCode"); final String baseUrl = captionsArray.getObject(i).getString("baseUrl"); final String vssId = captionsArray.getObject(i).getString("vssId"); if (languageCode != null && baseUrl != null && vssId != null) { final boolean isAutoGenerated = vssId.startsWith("a."); result.add(new SubtitlesInfo(baseUrl, languageCode, isAutoGenerated)); } } return result; } /*////////////////////////////////////////////////////////////////////////// // Data Class //////////////////////////////////////////////////////////////////////////*/ private class EmbeddedInfo { final String url; final String sts; EmbeddedInfo(final String url, final String sts) { this.url = url; this.sts = sts; } } private class SubtitlesInfo { final String cleanUrl; final String languageCode; final boolean isGenerated; final Locale locale; public SubtitlesInfo(final String baseUrl, final String languageCode, final boolean isGenerated) { this.cleanUrl = baseUrl .replaceAll("&fmt=[^&]*", "") // Remove preexisting format if exists .replaceAll("&tlang=[^&]*", ""); // Remove translation language this.languageCode = languageCode; this.isGenerated = isGenerated; final String[] splits = languageCode.split("-"); this.locale = splits.length == 2 ? new Locale(splits[0], splits[1]) : new Locale(languageCode); } public Subtitles getSubtitle(final SubtitlesFormat format) { return new Subtitles(format, locale, cleanUrl + "&fmt=" + format.getExtension(), isGenerated); } } /*////////////////////////////////////////////////////////////////////////// // Utils //////////////////////////////////////////////////////////////////////////*/ @Nonnull private static String getVideoInfoUrl(final String id, final String sts) { return "https://www.youtube.com/get_video_info?" + "video_id=" + id + "&eurl=https://youtube.googleapis.com/v/" + id + "&sts=" + sts + "&ps=default&gl=US&hl=en"; } private Map getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException { Map urlAndItags = new LinkedHashMap<>(); String encodedUrlMap = ""; if (playerArgs != null && playerArgs.isString(encodedUrlMapKey)) { encodedUrlMap = playerArgs.getString(encodedUrlMapKey, ""); } else if (videoInfoPage.containsKey(encodedUrlMapKey)) { encodedUrlMap = videoInfoPage.get(encodedUrlMapKey); } for (String url_data_str : encodedUrlMap.split(",")) { try { // This loop iterates through multiple streams, therefore tags // is related to one and the same stream at a time. Map tags = Parser.compatParseMap( org.jsoup.parser.Parser.unescapeEntities(url_data_str, true)); int itag = Integer.parseInt(tags.get("itag")); if (ItagItem.isSupported(itag)) { ItagItem itagItem = ItagItem.getItag(itag); if (itagItem.itagType == itagTypeWanted) { String streamUrl = tags.get("url"); // if video has a signature: decrypt it and add it to the url if (tags.get("s") != null) { streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode); } urlAndItags.put(streamUrl, itagItem); } } } catch (DecryptException e) { throw e; } catch (Exception ignored) { } } return urlAndItags; } /** * Provides information about links to other videos on the video page, such as related videos. * This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo. */ private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) { return new YoutubeStreamInfoItemExtractor(li) { @Override public String getUrl() throws ParsingException { return li.select("a.content-link").first().attr("abs:href"); } @Override public String getName() throws ParsingException { //todo: check NullPointerException causing return li.select("span.title").first().text(); //this page causes the NullPointerException, after finding it by searching for "tjvg": //https://www.youtube.com/watch?v=Uqg0aEhLFAg } @Override public String getUploaderName() throws ParsingException { return li.select("span[class*=\"attribution\"").first() .select("span").first().text(); } @Override public String getUploaderUrl() throws ParsingException { return ""; // The uploader is not linked } @Override public String getUploadDate() throws ParsingException { return ""; } @Override public long getViewCount() throws ParsingException { try { if (getStreamType() == StreamType.LIVE_STREAM) return -1; return Long.parseLong(Utils.removeNonDigitCharacters( li.select("span.view-count").first().text())); } catch (Exception e) { //related videos sometimes have no view count return 0; } } @Override public String getThumbnailUrl() throws ParsingException { Element img = li.select("img").first(); String thumbnailUrl = img.attr("abs:src"); // Sometimes youtube sends links to gif files which somehow seem to not exist // anymore. Items with such gif also offer a secondary image source. So we are going // to use that if we caught such an item. if (thumbnailUrl.contains(".gif")) { thumbnailUrl = img.attr("data-thumb"); } if (thumbnailUrl.startsWith("//")) { thumbnailUrl = HTTPS + thumbnailUrl; } return thumbnailUrl; } }; } }