kopia lustrzana https://github.com/TeamNewPipe/NewPipeExtractor
Try to fix detecting replies to a comment on the previous page
When getting a page which is not the initial page there it is possible that the first comments are replies to a comment from a previous page.pull/993/head
rodzic
b6e3015ee2
commit
e5be686b06
|
@ -13,7 +13,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
|||
* <br>
|
||||
* A page has an {@link #id}, an {@link #url}, as well as information on possible {@link #cookies}.
|
||||
* In case the data behind the URL has already been retrieved,
|
||||
* it can be accessed by using @link #getBody()} and {@link #getContent()}.
|
||||
* it can be accessed by using {@link #getBody()} or {@link #getContent()}.
|
||||
*/
|
||||
public class Page implements Serializable {
|
||||
private final String url;
|
||||
|
|
|
@ -21,13 +21,24 @@ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
|||
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
public class SoundcloudCommentsExtractor extends CommentsExtractor {
|
||||
public static final String COLLECTION = "collection";
|
||||
public static final String NEXT_HREF = "next_href";
|
||||
|
||||
/**
|
||||
* The last comment which was a top level comment.
|
||||
* Next pages might start with replies to the last top level comment
|
||||
* and therefore the {@link SoundcloudCommentsInfoItemExtractor#replyCount}
|
||||
* of the last top level comment cannot be determined certainly.
|
||||
*/
|
||||
@Nullable private JsonObject lastTopLevelComment;
|
||||
|
||||
public SoundcloudCommentsExtractor(final StreamingService service,
|
||||
final ListLinkHandler uiHandler) {
|
||||
super(service, uiHandler);
|
||||
|
@ -50,14 +61,15 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
|
|||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
|
||||
getServiceId());
|
||||
|
||||
collectCommentsFrom(collector, json);
|
||||
collectCommentsFrom(collector, json, null);
|
||||
|
||||
return new InfoItemsPage<>(collector, new Page(json.getString(NEXT_HREF)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws ExtractionException,
|
||||
IOException {
|
||||
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
||||
throws ExtractionException, IOException {
|
||||
|
||||
if (page == null || isNullOrEmpty(page.getUrl())) {
|
||||
throw new IllegalArgumentException("Page doesn't contain an URL");
|
||||
}
|
||||
|
@ -88,7 +100,7 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
|
|||
} catch (final JsonParserException e) {
|
||||
throw new ParsingException("Could not parse json", e);
|
||||
}
|
||||
collectCommentsFrom(collector, json);
|
||||
collectCommentsFrom(collector, json, lastTopLevelComment);
|
||||
}
|
||||
|
||||
if (hasNextPage) {
|
||||
|
@ -101,27 +113,86 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
|
|||
@Override
|
||||
public void onFetchPage(@Nonnull final Downloader downloader) { }
|
||||
|
||||
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
|
||||
final JsonObject json) throws ParsingException {
|
||||
/**
|
||||
* Collect top level comments from a SoundCloud API response.
|
||||
* @param collector the collector which collects the the top level comments
|
||||
* @param json the JsonObject of the API response
|
||||
* @param lastTopLevelComment the last top level comment from the previous page or {@code null}
|
||||
* if this method is run for the initial page.
|
||||
* @throws ParsingException
|
||||
*/
|
||||
private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector,
|
||||
@Nonnull final JsonObject json,
|
||||
@Nullable final JsonObject lastTopLevelComment)
|
||||
throws ParsingException {
|
||||
final List<SoundcloudCommentsInfoItemExtractor> extractors = new ArrayList<>();
|
||||
final String url = getUrl();
|
||||
final JsonArray entries = json.getArray(COLLECTION);
|
||||
JsonObject lastTopComment = null;
|
||||
for (int i = 0; i < entries.size(); i++) {
|
||||
final JsonObject entry = entries.getObject(i);
|
||||
if (i == 0
|
||||
|| (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
|
||||
&& !SoundcloudParsingHelper.isReplyTo(lastTopComment, entry))) {
|
||||
lastTopComment = entry;
|
||||
collector.commit(new SoundcloudCommentsInfoItemExtractor(
|
||||
json, i, entry, url));
|
||||
/**
|
||||
* The current top level comment.
|
||||
*/
|
||||
JsonObject currentTopLevelComment = null;
|
||||
boolean isLastCommentReply = true;
|
||||
// Check whether the first comment in the list is a reply to the last top level comment
|
||||
// from the previous page if there was a previous page.
|
||||
if (lastTopLevelComment != null) {
|
||||
final JsonObject firstComment = entries.getObject(0);
|
||||
if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment, firstComment)) {
|
||||
currentTopLevelComment = lastTopLevelComment;
|
||||
} else {
|
||||
extractors.add(new SoundcloudCommentsInfoItemExtractor(
|
||||
json, SoundcloudCommentsInfoItemExtractor.PREVIOUS_PAGE_INDEX,
|
||||
firstComment, url, null));
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < entries.size(); i++) {
|
||||
final JsonObject entry = entries.getObject(i);
|
||||
// extract all top level comments
|
||||
// The first comment is either a top level comment
|
||||
// if it is not a reply to the last top level comment
|
||||
//
|
||||
if (i == 0 && currentTopLevelComment == null
|
||||
|| (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
|
||||
&& !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) {
|
||||
currentTopLevelComment = entry;
|
||||
if (i == entries.size() - 1) {
|
||||
isLastCommentReply = false;
|
||||
this.lastTopLevelComment = currentTopLevelComment;
|
||||
// Do not collect the last comment if it is a top level comment
|
||||
// because it might have replies.
|
||||
// That is information we cannot get from the comment itself
|
||||
// (thanks SoundCloud...) but needs to be obtained from the next comment.
|
||||
// The comment will therefore be collected
|
||||
// when collecting the items from the next page.
|
||||
break;
|
||||
}
|
||||
extractors.add(new SoundcloudCommentsInfoItemExtractor(
|
||||
json, i, entry, url, lastTopLevelComment));
|
||||
}
|
||||
}
|
||||
if (isLastCommentReply) {
|
||||
// Do not collect the last top level comment if it has replies and the retrieved
|
||||
// comment list ends with a reply. We do not know whether the next page starts
|
||||
// with more replies to the last top level comment.
|
||||
this.lastTopLevelComment = extractors.remove(extractors.size() - 1).item;
|
||||
}
|
||||
extractors.stream().forEach(collector::commit);
|
||||
|
||||
}
|
||||
|
||||
private boolean collectRepliesFrom(final CommentsInfoItemsCollector collector,
|
||||
final JsonObject json,
|
||||
final int id,
|
||||
final String url) {
|
||||
/**
|
||||
* Collect replies to a top level comment from a SoundCloud API response.
|
||||
* @param collector the collector which collects the the replies
|
||||
* @param json the SoundCloud API response
|
||||
* @param id the comment's id for which the replies are collected
|
||||
* @param url the corresponding page's URL
|
||||
* @return
|
||||
*/
|
||||
private boolean collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
|
||||
@Nonnull final JsonObject json,
|
||||
final int id,
|
||||
@Nonnull final String url) {
|
||||
JsonObject originalComment = null;
|
||||
final JsonArray entries = json.getArray(COLLECTION);
|
||||
boolean moreReplies = false;
|
||||
|
|
|
@ -6,10 +6,8 @@ import com.grack.nanojson.JsonArray;
|
|||
import com.grack.nanojson.JsonObject;
|
||||
|
||||
import org.schabi.newpipe.extractor.Page;
|
||||
import org.schabi.newpipe.extractor.ServiceList;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
|
||||
|
@ -17,32 +15,42 @@ import org.schabi.newpipe.extractor.stream.Description;
|
|||
|
||||
import java.util.Objects;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
||||
public static final int PREVIOUS_PAGE_INDEX = -1;
|
||||
public static final String BODY = "body";
|
||||
public static final String USER_PERMALINK = "permalink";
|
||||
public static final String USER_FULL_NAME = "full_name";
|
||||
public static final String USER_USERNAME = "username";
|
||||
|
||||
private final JsonObject json;
|
||||
@Nonnull private final JsonObject json;
|
||||
private final int index;
|
||||
private final JsonObject item;
|
||||
@Nonnull public final JsonObject item;
|
||||
private final String url;
|
||||
private final JsonObject user;
|
||||
private final JsonObject superComment;
|
||||
@Nonnull private final JsonObject user;
|
||||
/**
|
||||
* A comment to which this comment is a reply.
|
||||
* Is {@code null} if this comment is itself a top level comment.
|
||||
*/
|
||||
@Nullable private final JsonObject topLevelComment;
|
||||
|
||||
/**
|
||||
* The reply count is not given by the SoundCloud API, but needs to be obtained
|
||||
* by counting the comments which come directly after this item and have the same timestamp.
|
||||
*/
|
||||
private int replyCount = CommentsInfoItem.UNKNOWN_REPLY_COUNT;
|
||||
private Page repliesPage = null;
|
||||
|
||||
public SoundcloudCommentsInfoItemExtractor(final JsonObject json, final int index,
|
||||
final JsonObject item, final String url,
|
||||
@Nullable final JsonObject superComment) {
|
||||
public SoundcloudCommentsInfoItemExtractor(@Nonnull final JsonObject json, final int index,
|
||||
@Nonnull final JsonObject item, final String url,
|
||||
@Nullable final JsonObject topLevelComment) {
|
||||
this.json = json;
|
||||
this.index = index;
|
||||
this.item = item;
|
||||
this.url = url;
|
||||
this.superComment = superComment;
|
||||
this.topLevelComment = topLevelComment;
|
||||
this.user = item.getObject("user");
|
||||
}
|
||||
|
||||
|
@ -58,7 +66,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
|
|||
@Override
|
||||
public Description getCommentText() {
|
||||
String commentContent = item.getString(BODY);
|
||||
if (superComment == null) {
|
||||
if (topLevelComment == null) {
|
||||
return new Description(commentContent, Description.PLAIN_TEXT);
|
||||
}
|
||||
// This comment is a reply to another comment.
|
||||
|
@ -78,7 +86,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
|
|||
}
|
||||
}
|
||||
if (author == null) {
|
||||
author = superComment.getObject("user");
|
||||
author = topLevelComment.getObject("user");
|
||||
}
|
||||
final String name = isNullOrEmpty(author.getString(USER_FULL_NAME))
|
||||
? author.getString(USER_USERNAME) : author.getString(USER_FULL_NAME);
|
||||
|
@ -149,24 +157,17 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
|
|||
@Override
|
||||
public Page getReplies() {
|
||||
if (replyCount == CommentsInfoItem.UNKNOWN_REPLY_COUNT) {
|
||||
final JsonArray replies = new JsonArray();
|
||||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
|
||||
ServiceList.SoundCloud.getServiceId());
|
||||
replyCount = 0;
|
||||
// SoundCloud has only comments and top level replies, but not nested replies.
|
||||
// Therefore, replies cannot have further replies.
|
||||
if (superComment == null) {
|
||||
if (topLevelComment == null) {
|
||||
// Loop through all comments which come after the original comment
|
||||
// to find its replies.
|
||||
final JsonArray allItems = json.getArray(SoundcloudCommentsExtractor.COLLECTION);
|
||||
boolean foundReply = false;
|
||||
for (int i = index + 1; i < allItems.size(); i++) {
|
||||
final JsonObject comment = allItems.getObject(i);
|
||||
if (SoundcloudParsingHelper.isReplyTo(item, comment)) {
|
||||
replies.add(comment);
|
||||
collector.commit(new SoundcloudCommentsInfoItemExtractor(
|
||||
json, i, comment, url, item));
|
||||
foundReply = true;
|
||||
} else if (foundReply) {
|
||||
if (SoundcloudParsingHelper.isReplyTo(item, allItems.getObject(i))) {
|
||||
replyCount++;
|
||||
} else {
|
||||
// Only the comments directly after the original comment
|
||||
// having the same timestamp are replies to the original comment.
|
||||
// The first comment not having the same timestamp
|
||||
|
@ -175,8 +176,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
|
|||
}
|
||||
}
|
||||
}
|
||||
replyCount = replies.size();
|
||||
if (collector.getItems().isEmpty()) {
|
||||
if (replyCount == 0) {
|
||||
return null;
|
||||
}
|
||||
repliesPage = new Page(getUrl(), getCommentId());
|
||||
|
|
Ładowanie…
Reference in New Issue