[YouTube] Support new A/B tested comments data

Also improve current comments code by removing outdated comment
renderer data.
pull/1163/head
AudricV 2024-03-17 15:08:58 +01:00
rodzic e5b30ae8c3
commit 293c3e9e47
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: DA92EC7905614198
3 zmienionych plików z 377 dodań i 71 usunięć

Wyświetl plik

@ -0,0 +1,235 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.Image;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.List;
import java.util.Objects;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getAttributedDescription;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getImagesFromThumbnailsArray;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
/**
* A {@link CommentsInfoItemExtractor} for YouTube comment data returned in a view model and entity
* updates.
*/
class YoutubeCommentsEUVMInfoItemExtractor implements CommentsInfoItemExtractor {
private static final String AUTHOR = "author";
private static final String PROPERTIES = "properties";
@Nonnull
private final JsonObject commentViewModel;
@Nullable
private final JsonObject commentRepliesRenderer;
@Nonnull
private final JsonObject commentEntityPayload;
@Nonnull
private final JsonObject engagementToolbarStateEntityPayload;
@Nonnull
private final String videoUrl;
@Nonnull
private final TimeAgoParser timeAgoParser;
YoutubeCommentsEUVMInfoItemExtractor(
@Nonnull final JsonObject commentViewModel,
@Nullable final JsonObject commentRepliesRenderer,
@Nonnull final JsonObject commentEntityPayload,
@Nonnull final JsonObject engagementToolbarStateEntityPayload,
@Nonnull final String videoUrl,
@Nonnull final TimeAgoParser timeAgoParser) {
this.commentViewModel = commentViewModel;
this.commentRepliesRenderer = commentRepliesRenderer;
this.commentEntityPayload = commentEntityPayload;
this.engagementToolbarStateEntityPayload = engagementToolbarStateEntityPayload;
this.videoUrl = videoUrl;
this.timeAgoParser = timeAgoParser;
}
@Override
public String getName() throws ParsingException {
return getUploaderName();
}
@Override
public String getUrl() throws ParsingException {
return videoUrl;
}
@Nonnull
@Override
public List<Image> getThumbnails() throws ParsingException {
return getUploaderAvatars();
}
@Override
public int getLikeCount() throws ParsingException {
final String textualLikeCount = getTextualLikeCount();
try {
if (Utils.isBlank(textualLikeCount)) {
return 0;
}
return (int) Utils.mixedNumberWordToLong(textualLikeCount);
} catch (final Exception e) {
throw new ParsingException(
"Unexpected error while converting textual like count to like count", e);
}
}
@Override
public String getTextualLikeCount() {
return commentEntityPayload.getObject("toolbar")
.getString("likeCountNotliked");
}
@Override
public Description getCommentText() throws ParsingException {
// Comments' text work in the same way as an attributed video description
return new Description(
getAttributedDescription(commentEntityPayload.getObject(PROPERTIES)
.getObject("content")), Description.HTML);
}
@Override
public String getTextualUploadDate() throws ParsingException {
return commentEntityPayload.getObject(PROPERTIES)
.getString("publishedTime");
}
@Nullable
@Override
public DateWrapper getUploadDate() throws ParsingException {
final String textualPublishedTime = getTextualUploadDate();
if (isNullOrEmpty(textualPublishedTime)) {
return null;
}
return timeAgoParser.parse(textualPublishedTime);
}
@Override
public String getCommentId() throws ParsingException {
String commentId = commentEntityPayload.getObject(PROPERTIES)
.getString("commentId");
if (isNullOrEmpty(commentId)) {
commentId = commentViewModel.getString("commentId");
if (isNullOrEmpty(commentId)) {
throw new ParsingException("Could not get comment ID");
}
}
return commentId;
}
@Override
public String getUploaderUrl() throws ParsingException {
final JsonObject author = commentEntityPayload.getObject(AUTHOR);
String channelId = author.getString("channelId");
if (isNullOrEmpty(channelId)) {
channelId = author.getObject("channelCommand")
.getObject("innertubeCommand")
.getObject("browseEndpoint")
.getString("browseId");
if (isNullOrEmpty(channelId)) {
channelId = author.getObject("avatar")
.getObject("endpoint")
.getObject("innertubeCommand")
.getObject("browseEndpoint")
.getString("browseId");
if (isNullOrEmpty(channelId)) {
throw new ParsingException("Could not get channel ID");
}
}
}
return "https://www.youtube.com/channel/" + channelId;
}
@Override
public String getUploaderName() throws ParsingException {
return commentEntityPayload.getObject(AUTHOR)
.getString("displayName");
}
@Nonnull
@Override
public List<Image> getUploaderAvatars() throws ParsingException {
return getImagesFromThumbnailsArray(commentEntityPayload.getObject("avatar")
.getObject("image")
.getArray("sources"));
}
@Override
public boolean isHeartedByUploader() {
return "TOOLBAR_HEART_STATE_HEARTED".equals(
engagementToolbarStateEntityPayload.getString("heartState"));
}
@Override
public boolean isPinned() {
return commentViewModel.has("pinnedText");
}
@Override
public boolean isUploaderVerified() throws ParsingException {
final JsonObject author = commentEntityPayload.getObject(AUTHOR);
return author.getBoolean("isVerified") || author.getBoolean("isArtist");
}
@Override
public int getReplyCount() throws ParsingException {
// As YouTube allows replies up to 750 comments, we cannot check if the count returned is a
// mixed number or a real number
// Assume it is a mixed one, as it matches how numbers of most properties are returned
final String replyCountString = commentEntityPayload.getObject("toolbar")
.getString("replyCount");
if (isNullOrEmpty(replyCountString)) {
return 0;
}
return (int) Utils.mixedNumberWordToLong(replyCountString);
}
@Nullable
@Override
public Page getReplies() throws ParsingException {
if (isNullOrEmpty(commentRepliesRenderer)) {
return null;
}
final String continuation = commentRepliesRenderer.getArray("contents")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.map(content -> content.getObject("continuationItemRenderer", null))
.filter(Objects::nonNull)
.findFirst()
.map(continuationItemRenderer ->
continuationItemRenderer.getObject("continuationEndpoint")
.getObject("continuationCommand")
.getString("token"))
.orElseThrow(() ->
new ParsingException("Could not get comment replies continuation"));
return new Page(videoUrl, continuation);
}
@Override
public boolean isChannelOwner() {
return commentEntityPayload.getObject(AUTHOR)
.getBoolean("isCreator");
}
@Override
public boolean hasCreatorReply() {
return commentRepliesRenderer != null
&& commentRepliesRenderer.has("viewRepliesCreatorThumbnail");
}
}

Wyświetl plik

@ -13,6 +13,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;
@ -21,7 +22,6 @@ import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
@ -30,6 +30,9 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
public class YoutubeCommentsExtractor extends CommentsExtractor {
private static final String COMMENT_VIEW_MODEL_KEY = "commentViewModel";
private static final String COMMENT_RENDERER_KEY = "commentRenderer";
/**
* Whether comments are disabled on video.
*/
@ -74,8 +77,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
return null;
}
final String token = contents
.stream()
final String token = contents.stream()
// Only use JsonObjects
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
@ -120,6 +122,21 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
}
}
@Nonnull
private JsonObject getMutationPayloadFromEntityKey(@Nonnull final JsonArray mutations,
@Nonnull final String commentKey)
throws ParsingException {
return mutations.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(mutation -> commentKey.equals(
mutation.getString("entityKey")))
.findFirst()
.orElseThrow(() -> new ParsingException(
"Could not get comment entity payload mutation"))
.getObject("payload");
}
@Nonnull
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
@ -207,8 +224,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
return new InfoItemsPage<>(collector, getNextPage(jsonObject));
}
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
final JsonObject jsonObject)
private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject jsonObject)
throws ParsingException {
final JsonArray onResponseReceivedEndpoints =
@ -233,6 +250,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
final JsonArray contents;
try {
// A copy of the array is needed, otherwise the continuation item is removed from the
// original object which is used to get the continuation
contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
} catch (final Exception e) {
// No comments
@ -244,23 +263,80 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
contents.remove(index);
}
final String jsonKey = contents.getObject(0).has("commentThreadRenderer")
? "commentThreadRenderer"
: "commentRenderer";
// The mutations object, which is returned in the comments' continuation
// It contains parts of comment data when comments are returned with a view model
final JsonArray mutations = jsonObject.getObject("frameworkUpdates")
.getObject("entityBatchUpdate")
.getArray("mutations");
final String videoUrl = getUrl();
final TimeAgoParser timeAgoParser = getTimeAgoParser();
final List<Object> comments;
try {
comments = JsonUtils.getValues(contents, jsonKey);
} catch (final Exception e) {
throw new ParsingException("Unable to get parse youtube comments", e);
for (final Object o : contents) {
if (!(o instanceof JsonObject)) {
continue;
}
collectCommentItem(mutations, (JsonObject) o, collector, videoUrl, timeAgoParser);
}
}
final String url = getUrl();
comments.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser()))
.forEach(collector::commit);
private void collectCommentItem(@Nonnull final JsonArray mutations,
@Nonnull final JsonObject content,
@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final String videoUrl,
@Nonnull final TimeAgoParser timeAgoParser)
throws ParsingException {
if (content.has("commentThreadRenderer")) {
final JsonObject commentThreadRenderer =
content.getObject("commentThreadRenderer");
if (commentThreadRenderer.has(COMMENT_VIEW_MODEL_KEY)) {
final JsonObject commentViewModel =
commentThreadRenderer.getObject(COMMENT_VIEW_MODEL_KEY)
.getObject(COMMENT_VIEW_MODEL_KEY);
collector.commit(new YoutubeCommentsEUVMInfoItemExtractor(
commentViewModel,
commentThreadRenderer.getObject("replies")
.getObject("commentRepliesRenderer"),
getMutationPayloadFromEntityKey(mutations,
commentViewModel.getString("commentKey", ""))
.getObject("commentEntityPayload"),
getMutationPayloadFromEntityKey(mutations,
commentViewModel.getString("toolbarStateKey", ""))
.getObject("engagementToolbarStateEntityPayload"),
videoUrl,
timeAgoParser));
} else if (commentThreadRenderer.has("comment")) {
collector.commit(new YoutubeCommentsInfoItemExtractor(
commentThreadRenderer.getObject("comment")
.getObject(COMMENT_RENDERER_KEY),
commentThreadRenderer.getObject("replies")
.getObject("commentRepliesRenderer"),
videoUrl,
timeAgoParser));
}
} else if (content.has(COMMENT_VIEW_MODEL_KEY)) {
final JsonObject commentViewModel = content.getObject(COMMENT_VIEW_MODEL_KEY);
collector.commit(new YoutubeCommentsEUVMInfoItemExtractor(
commentViewModel,
null,
getMutationPayloadFromEntityKey(mutations,
commentViewModel.getString("commentKey", ""))
.getObject("commentEntityPayload"),
getMutationPayloadFromEntityKey(mutations,
commentViewModel.getString("toolbarStateKey", ""))
.getObject("engagementToolbarStateEntityPayload"),
videoUrl,
timeAgoParser));
} else if (content.has(COMMENT_RENDERER_KEY)) {
// commentRenderers are directly returned for comment replies, so there is no
// commentRepliesRenderer to provide
// Also, YouTube has only one comment reply level
collector.commit(new YoutubeCommentsInfoItemExtractor(
content.getObject(COMMENT_RENDERER_KEY),
null,
videoUrl,
timeAgoParser));
}
}
@Override
@ -307,10 +383,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
return -1;
}
final JsonObject countText = ajaxJson
.getArray("onResponseReceivedEndpoints").getObject(0)
final JsonObject countText = ajaxJson.getArray("onResponseReceivedEndpoints")
.getObject(0)
.getObject("reloadContinuationItemsCommand")
.getArray("continuationItems").getObject(0)
.getArray("continuationItems")
.getObject(0)
.getObject("commentsHeaderRenderer")
.getObject("countText");

Wyświetl plik

@ -22,40 +22,36 @@ import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
private final JsonObject json;
private JsonObject commentRenderer;
@Nonnull
private final JsonObject commentRenderer;
@Nullable
private final JsonObject commentRepliesRenderer;
@Nonnull
private final String url;
@Nonnull
private final TimeAgoParser timeAgoParser;
public YoutubeCommentsInfoItemExtractor(final JsonObject json,
final String url,
final TimeAgoParser timeAgoParser) {
this.json = json;
public YoutubeCommentsInfoItemExtractor(@Nonnull final JsonObject commentRenderer,
@Nullable final JsonObject commentRepliesRenderer,
@Nonnull final String url,
@Nonnull final TimeAgoParser timeAgoParser) {
this.commentRenderer = commentRenderer;
this.commentRepliesRenderer = commentRepliesRenderer;
this.url = url;
this.timeAgoParser = timeAgoParser;
}
private JsonObject getCommentRenderer() throws ParsingException {
if (commentRenderer == null) {
if (json.has("comment")) {
commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer");
} else {
commentRenderer = json;
}
}
return commentRenderer;
}
@Nonnull
private List<Image> getAuthorThumbnails() throws ParsingException {
try {
return getImagesFromThumbnailsArray(JsonUtils.getArray(getCommentRenderer(),
return getImagesFromThumbnailsArray(JsonUtils.getArray(commentRenderer,
"authorThumbnail.thumbnails"));
} catch (final Exception e) {
throw new ParsingException("Could not get author thumbnails", e);
}
}
@Nonnull
@Override
public String getUrl() throws ParsingException {
return url;
@ -70,7 +66,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getName() throws ParsingException {
try {
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
return getTextFromObject(JsonUtils.getObject(commentRenderer, "authorText"));
} catch (final Exception e) {
return "";
}
@ -79,7 +75,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getTextualUploadDate() throws ParsingException {
try {
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(),
return getTextFromObject(JsonUtils.getObject(commentRenderer,
"publishedTimeText"));
} catch (final Exception e) {
throw new ParsingException("Could not get publishedTimeText", e);
@ -90,8 +86,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public DateWrapper getUploadDate() throws ParsingException {
final String textualPublishedTime = getTextualUploadDate();
if (timeAgoParser != null && textualPublishedTime != null
&& !textualPublishedTime.isEmpty()) {
if (textualPublishedTime != null && !textualPublishedTime.isEmpty()) {
return timeAgoParser.parse(textualPublishedTime);
} else {
return null;
@ -118,7 +113,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
// Try first to get the exact like count by using the accessibility data
final String likeCount;
try {
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(),
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(commentRenderer,
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer"
+ ".accessibilityData.accessibilityData.label"));
} catch (final Exception e) {
@ -170,11 +165,11 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
*/
try {
// If a comment has no likes voteCount is not set
if (!getCommentRenderer().has("voteCount")) {
if (!commentRenderer.has("voteCount")) {
return "";
}
final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount");
final JsonObject voteCountObj = JsonUtils.getObject(commentRenderer, "voteCount");
if (voteCountObj.isEmpty()) {
return "";
}
@ -188,7 +183,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public Description getCommentText() throws ParsingException {
try {
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
final JsonObject contentText = JsonUtils.getObject(commentRenderer, "contentText");
if (contentText.isEmpty()) {
// completely empty comments as described in
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
@ -208,7 +203,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getCommentId() throws ParsingException {
try {
return JsonUtils.getString(getCommentRenderer(), "commentId");
return JsonUtils.getString(commentRenderer, "commentId");
} catch (final Exception e) {
throw new ParsingException("Could not get comment id", e);
}
@ -221,27 +216,26 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
}
@Override
public boolean isHeartedByUploader() throws ParsingException {
final JsonObject commentActionButtonsRenderer = getCommentRenderer()
.getObject("actionButtons")
public boolean isHeartedByUploader() {
final JsonObject commentActionButtonsRenderer = commentRenderer.getObject("actionButtons")
.getObject("commentActionButtonsRenderer");
return commentActionButtonsRenderer.has("creatorHeart");
}
@Override
public boolean isPinned() throws ParsingException {
return getCommentRenderer().has("pinnedCommentBadge");
public boolean isPinned() {
return commentRenderer.has("pinnedCommentBadge");
}
@Override
public boolean isUploaderVerified() throws ParsingException {
return getCommentRenderer().has("authorCommentBadge");
return commentRenderer.has("authorCommentBadge");
}
@Override
public String getUploaderName() throws ParsingException {
try {
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
return getTextFromObject(JsonUtils.getObject(commentRenderer, "authorText"));
} catch (final Exception e) {
return "";
}
@ -250,7 +244,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getUploaderUrl() throws ParsingException {
try {
return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(),
return "https://www.youtube.com/channel/" + JsonUtils.getString(commentRenderer,
"authorEndpoint.browseEndpoint.browseId");
} catch (final Exception e) {
return "";
@ -258,19 +252,22 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
}
@Override
public int getReplyCount() throws ParsingException {
final JsonObject commentRendererJsonObject = getCommentRenderer();
if (commentRendererJsonObject.has("replyCount")) {
return commentRendererJsonObject.getInt("replyCount");
public int getReplyCount() {
if (commentRenderer.has("replyCount")) {
return commentRenderer.getInt("replyCount");
}
return UNKNOWN_REPLY_COUNT;
}
@Override
public Page getReplies() {
if (commentRepliesRenderer == null) {
return null;
}
try {
final String id = JsonUtils.getString(
JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents")
JsonUtils.getArray(commentRepliesRenderer, "contents")
.getObject(0),
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
return new Page(url, id);
@ -280,20 +277,17 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
}
@Override
public boolean isChannelOwner() throws ParsingException {
return getCommentRenderer().getBoolean("authorIsChannelOwner");
public boolean isChannelOwner() {
return commentRenderer.getBoolean("authorIsChannelOwner");
}
@Override
public boolean hasCreatorReply() throws ParsingException {
try {
final JsonObject commentRepliesRenderer = JsonUtils.getObject(json,
"replies.commentRepliesRenderer");
return commentRepliesRenderer.has("viewRepliesCreatorThumbnail");
} catch (final Exception e) {
public boolean hasCreatorReply() {
if (commentRepliesRenderer == null) {
return false;
}
return commentRepliesRenderer.has("viewRepliesCreatorThumbnail");
}
}