restructured search engine

pull/224/head
Christian Schabesberger 2016-02-29 15:59:06 +01:00
rodzic 737a41f45b
commit d7e75e6011
7 zmienionych plików z 261 dodań i 132 usunięć

Wyświetl plik

@ -17,6 +17,7 @@ import java.io.IOException;
import java.util.List;
import org.schabi.newpipe.extractor.ExtractionException;
import org.schabi.newpipe.extractor.SearchResult;
import org.schabi.newpipe.extractor.StreamPreviewInfo;
import org.schabi.newpipe.extractor.SearchEngine;
import org.schabi.newpipe.extractor.StreamingService;
@ -67,9 +68,9 @@ public class VideoItemListFragment extends ListFragment {
private boolean loadingNextPage = true;
private class ResultRunnable implements Runnable {
private final SearchEngine.Result result;
private final SearchResult result;
private final int requestId;
public ResultRunnable(SearchEngine.Result result, int requestId) {
public ResultRunnable(SearchResult result, int requestId) {
this.result = result;
this.requestId = requestId;
}
@ -105,8 +106,8 @@ public class VideoItemListFragment extends ListFragment {
String searchLanguageKey = getContext().getString(R.string.search_language_key);
String searchLanguage = sp.getString(searchLanguageKey,
getString(R.string.default_language_value));
SearchEngine.Result result = engine.search(query, page, searchLanguage,
new Downloader());
SearchResult result = SearchResult
.getSearchResult(engine, query, page, searchLanguage, new Downloader());
//Log.i(TAG, "language code passed:\""+searchLanguage+"\"");
if(runs) {
@ -165,12 +166,10 @@ public class VideoItemListFragment extends ListFragment {
this.streamingService = streamingService;
}
private void updateListOnResult(SearchEngine.Result result, int requestId) {
private void updateListOnResult(SearchResult result, int requestId) {
if(requestId == currentRequestId) {
setListShown(true);
if (result.resultList.isEmpty()) {
Toast.makeText(getActivity(), result.errorMessage, Toast.LENGTH_LONG).show();
} else {
if (!result.resultList.isEmpty()) {
if (!result.suggestion.isEmpty()) {
Toast.makeText(getActivity(), getString(R.string.did_you_mean) + result.suggestion + " ?",
Toast.LENGTH_LONG).show();

Wyświetl plik

@ -27,16 +27,10 @@ import java.util.Vector;
@SuppressWarnings("ALL")
public interface SearchEngine {
class Result {
public String errorMessage = "";
public String suggestion = "";
public final List<StreamPreviewInfo> resultList = new Vector<>();
}
ArrayList<String> suggestionList(String query,String contentCountry, Downloader dl)
throws ExtractionException, IOException;
//Result search(String query, int page);
Result search(String query, int page, String contentCountry, Downloader dl)
StreamPreviewInfoCollector search(String query, int page, String contentCountry, Downloader dl)
throws ExtractionException, IOException;
}

Wyświetl plik

@ -0,0 +1,41 @@
package org.schabi.newpipe.extractor;
import java.io.IOException;
import java.util.List;
import java.util.Vector;
/**
* Created by Christian Schabesberger on 29.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* SearchResult.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class SearchResult {
public static SearchResult getSearchResult(SearchEngine engine, String query,
int page, String languageCode, Downloader dl)
throws ExtractionException, IOException {
try {
return engine.search(query, page, languageCode, dl).getSearchResult();
} catch (Exception e) {
throw new ExtractionException("Could not get any search result", e);
}
}
public String suggestion = "";
public final List<StreamPreviewInfo> resultList = new Vector<>();
public List<Exception> errors = new Vector<>();
}

Wyświetl plik

@ -1,9 +1,5 @@
package org.schabi.newpipe.extractor;
import android.graphics.Bitmap;
import android.os.Parcel;
import android.os.Parcelable;
/**
* Created by Christian Schabesberger on 26.08.15.
*

Wyświetl plik

@ -0,0 +1,91 @@
package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeStreamUrlIdHandler;
/**
* Created by Christian Schabesberger on 28.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* StreamPreviewInfoCollector.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class StreamPreviewInfoCollector {
SearchResult result = new SearchResult();
StreamUrlIdHandler urlIdHandler = null;
public StreamPreviewInfoCollector(StreamUrlIdHandler handler) {
urlIdHandler = handler;
}
public void setSuggestion(String suggestion) {
result.suggestion = suggestion;
}
public void addError(Exception e) {
result.errors.add(e);
}
public SearchResult getSearchResult() {
return result;
}
public void commit(StreamPreviewInfoExtractor extractor) throws ParsingException {
try {
StreamPreviewInfo resultItem = new StreamPreviewInfo();
// importand information
resultItem.webpage_url = extractor.getWebPageUrl();
if (urlIdHandler == null) {
throw new ParsingException("Error: UrlIdHandler not set");
} else {
resultItem.id = (new YoutubeStreamUrlIdHandler()).getVideoId(resultItem.webpage_url);
}
resultItem.title = extractor.getTitle();
// optional iformation
try {
resultItem.duration = extractor.getDuration();
} catch (Exception e) {
addError(e);
}
try {
resultItem.uploader = extractor.getUploader();
} catch (Exception e) {
addError(e);
}
try {
resultItem.upload_date = extractor.getUploadDate();
} catch (Exception e) {
addError(e);
}
try {
resultItem.view_count = extractor.getViewCount();
} catch (Exception e) {
addError(e);
}
try {
resultItem.thumbnail_url = extractor.getThumbnailUrl();
} catch (Exception e) {
addError(e);
}
result.resultList.add(resultItem);
} catch (Exception e) {
addError(e);
}
}
}

Wyświetl plik

@ -0,0 +1,31 @@
package org.schabi.newpipe.extractor;
/**
* Created by Christian Schabesberger on 28.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* StreamPreviewInfoExtractor.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public interface StreamPreviewInfoExtractor {
String getWebPageUrl() throws ParsingException;
String getTitle() throws ParsingException;
String getDuration() throws ParsingException;
String getUploader() throws ParsingException;
String getUploadDate() throws ParsingException;
long getViewCount() throws ParsingException;
String getThumbnailUrl() throws ParsingException;
}

Wyświetl plik

@ -10,7 +10,9 @@ import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.ParsingException;
import org.schabi.newpipe.extractor.SearchEngine;
import org.schabi.newpipe.extractor.StreamPreviewInfo;
import org.schabi.newpipe.extractor.StreamExtractor;
import org.schabi.newpipe.extractor.StreamPreviewInfoCollector;
import org.schabi.newpipe.extractor.StreamPreviewInfoExtractor;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
@ -49,9 +51,10 @@ public class YoutubeSearchEngine implements SearchEngine {
private static final String TAG = YoutubeSearchEngine.class.toString();
@Override
public Result search(String query, int page, String languageCode, Downloader downloader)
public StreamPreviewInfoCollector search(String query, int page, String languageCode, Downloader downloader)
throws IOException, ParsingException {
Result result = new Result();
StreamPreviewInfoCollector collector = new StreamPreviewInfoCollector(
new YoutubeStreamUrlIdHandler());
Uri.Builder builder = new Uri.Builder();
builder.scheme("https")
.authority("www.youtube.com")
@ -71,12 +74,11 @@ public class YoutubeSearchEngine implements SearchEngine {
site = downloader.download(url);
}
try {
Document doc = Jsoup.parse(site, url);
Element list = doc.select("ol[class=\"item-section\"]").first();
Document doc = Jsoup.parse(site, url);
Element list = doc.select("ol[class=\"item-section\"]").first();
for (Element item : list.children()) {
for (Element item : list.children()) {
/* First we need to determine which kind of item we are working with.
Youtube depicts five different kinds of items on its search result page. These are
regular videos, playlists, channels, two types of video suggestions, and a "no video
@ -88,62 +90,26 @@ public class YoutubeSearchEngine implements SearchEngine {
playlists now.
*/
Element el;
Element el;
// both types of spell correction item
if (!((el = item.select("div[class*=\"spell-correction\"]").first()) == null)) {
result.suggestion = el.select("a").first().text();
// search message item
} else if (!((el = item.select("div[class*=\"search-message\"]").first()) == null)) {
result.errorMessage = el.text();
// both types of spell correction item
if (!((el = item.select("div[class*=\"spell-correction\"]").first()) == null)) {
collector.setSuggestion(el.select("a").first().text());
// search message item
} else if (!((el = item.select("div[class*=\"search-message\"]").first()) == null)) {
//result.errorMessage = el.text();
throw new StreamExtractor.ContentNotAvailableException(el.text());
// video item type
} else if (!((el = item.select("div[class*=\"yt-lockup-video\"").first()) == null)) {
StreamPreviewInfo resultItem = new StreamPreviewInfo();
// importand information
resultItem.webpage_url = getWebpageUrl(item);
resultItem.id = (new YoutubeStreamUrlIdHandler()).getVideoId(resultItem.webpage_url);
resultItem.title = getTitle(item);
// optional iformation
//todo: make this a proper error handling
try {
resultItem.duration = getDuration(item);
} catch (Exception e) {
e.printStackTrace();
}
try {
resultItem.uploader = getUploader(item);
} catch (Exception e) {
e.printStackTrace();
}
try {
resultItem.upload_date = getUploadDate(item);
} catch (Exception e) {
e.printStackTrace();
}
try {
resultItem.view_count = getViewCount(item);
} catch (Exception e) {
e.printStackTrace();
}
try {
resultItem.thumbnail_url = getThumbnailUrl(item);
} catch (Exception e) {
e.printStackTrace();
}
result.resultList.add(resultItem);
} else {
//noinspection ConstantConditions
Log.e(TAG, "unexpected element found:\"" + el + "\"");
}
// video item type
} else if (!((el = item.select("div[class*=\"yt-lockup-video\"").first()) == null)) {
collector.commit(extractPreviewInfo(el));
} else {
//noinspection ConstantConditions
collector.addError(new Exception("unexpected element found:\"" + el + "\""));
}
} catch(Exception e) {
throw new ParsingException(e);
}
return result;
return collector;
}
@Override
@ -203,67 +169,78 @@ public class YoutubeSearchEngine implements SearchEngine {
}
}
private String getWebpageUrl(Element item) {
Element el = item.select("div[class*=\"yt-lockup-video\"").first();
Element dl = el.select("h3").first().select("a").first();
return dl.attr("abs:href");
}
private StreamPreviewInfoExtractor extractPreviewInfo(final Element item) {
return new StreamPreviewInfoExtractor() {
@Override
public String getWebPageUrl() throws ParsingException {
Element el = item.select("div[class*=\"yt-lockup-video\"").first();
Element dl = el.select("h3").first().select("a").first();
return dl.attr("abs:href");
}
private String getTitle(Element item) {
Element el = item.select("div[class*=\"yt-lockup-video\"").first();
Element dl = el.select("h3").first().select("a").first();
return dl.text();
}
@Override
public String getTitle() throws ParsingException {
Element el = item.select("div[class*=\"yt-lockup-video\"").first();
Element dl = el.select("h3").first().select("a").first();
return dl.text();
}
private String getDuration(Element item) {
try {
return item.select("span[class=\"video-time\"]").first().text();
} catch(Exception e) {
e.printStackTrace();
}
return "";
}
@Override
public String getDuration() throws ParsingException {
try {
return item.select("span[class=\"video-time\"]").first().text();
} catch(Exception e) {
e.printStackTrace();
}
return "";
}
private String getUploader(Element item) {
return item.select("div[class=\"yt-lockup-byline\"]").first()
.select("a").first()
.text();
}
@Override
public String getUploader() throws ParsingException {
return item.select("div[class=\"yt-lockup-byline\"]").first()
.select("a").first()
.text();
}
private String getUploadDate(Element item) {
return item.select("div[class=\"yt-lockup-meta\"]").first()
.select("li").first()
.text();
}
@Override
public String getUploadDate() throws ParsingException {
return item.select("div[class=\"yt-lockup-meta\"]").first()
.select("li").first()
.text();
}
private long getViewCount(Element item) throws Parser.RegexException{
String output;
String input = item.select("div[class=\"yt-lockup-meta\"]").first()
.select("li").get(1)
.text();
output = Parser.matchGroup1("([0-9,\\. ]*)", input)
.replace(" ", "")
.replace(".", "")
.replace(",", "");
@Override
public long getViewCount() throws ParsingException {
String output;
String input = item.select("div[class=\"yt-lockup-meta\"]").first()
.select("li").get(1)
.text();
output = Parser.matchGroup1("([0-9,\\. ]*)", input)
.replace(" ", "")
.replace(".", "")
.replace(",", "");
if(Long.parseLong(output) == 30) {
Log.d(TAG, "bla");
}
return Long.parseLong(output);
}
if(Long.parseLong(output) == 30) {
Log.d(TAG, "bla");
}
return Long.parseLong(output);
}
private String getThumbnailUrl(Element item) {
String url;
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first()
.select("img").first();
url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
@Override
public String getThumbnailUrl() throws ParsingException {
String url;
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first()
.select("img").first();
url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
return url;
return url;
}
};
}
}