Added experimental support for "-site:" and "lang:" queries

pull/20/head^2
Slatian 2022-11-24 15:38:24 +01:00 zatwierdzone przez Alexander Cobleigh
rodzic b4a2e5e269
commit b431a15441
2 zmienionych plików z 71 dodań i 27 usunięć

Wyświetl plik

@ -19,10 +19,13 @@ import (
"log"
"net/url"
"strings"
"regexp"
_ "github.com/mattn/go-sqlite3"
)
var languageCodeSanityRegex = regexp.MustCompile("^[a-zA-Z\\-0-9]+$")
func InitDB(filepath string) *sql.DB {
db, err := sql.Open("sqlite3", filepath)
if err != nil {
@ -95,17 +98,19 @@ query params:
&order=score, &order=count
*/
var emptyStringArray = []string{}
func SearchWordsByScore(db *sql.DB, words []string) []types.PageData {
return searchWords(db, words, true)
return SearchWords(db, words, true, emptyStringArray, emptyStringArray, emptyStringArray)
}
func SearchWordsBySite(db *sql.DB, words []string, domain string) []types.PageData {
// search words by site is same as search words by score, but adds a domain condition
return searchWords(db, words, true, domain)
return SearchWords(db, words, true, []string{domain}, emptyStringArray, emptyStringArray)
}
func SearchWordsByCount(db *sql.DB, words []string) []types.PageData {
return searchWords(db, words, false)
return SearchWords(db, words, false, emptyStringArray, emptyStringArray, emptyStringArray)
}
func FulltextSearchWords(db *sql.DB, phrase string) []types.PageData {
@ -222,12 +227,16 @@ func countQuery(db *sql.DB, table string) int {
return count
}
func searchWords(db *sql.DB, words []string, searchByScore bool, domain ...string) []types.PageData {
var wordlist []string
func SearchWords(db *sql.DB, words []string, searchByScore bool, domain []string, nodomain []string, language []string) []types.PageData {
var args []interface{}
for _, word := range words {
wordlist = append(wordlist, "word = ?")
args = append(args, strings.ToLower(word))
wordlist := []string{"1"}
if len(words) > 0 && words[0] != "" {
wordlist = make([]string, 0)
for _, word := range words {
wordlist = append(wordlist, "word = ?")
args = append(args, strings.ToLower(word))
}
}
// the domains conditional defaults to just 'true' i.e. no domain condition
@ -240,6 +249,28 @@ func searchWords(db *sql.DB, words []string, searchByScore bool, domain ...strin
}
}
nodomains := []string{"1"}
if len(nodomain) > 0 && nodomain[0] != "" {
nodomains = make([]string, 0)
for _, d := range nodomain {
nodomains = append(nodomains, "domain != ?")
args = append(args, d)
}
}
//This needs some wildcard support …
languages := []string{"1"}
if len(language) > 0 && language[0] != "" {
languages = make([]string, 0)
for _, d := range language {
// Do a little check to avoid the database being DOSed
if languageCodeSanityRegex.MatchString(d) {
languages = append(languages, "lang LIKE ?")
args = append(args, d+"%")
}
}
}
orderType := "SUM(score)"
if !searchByScore {
orderType = "COUNT(*)"
@ -250,11 +281,16 @@ func searchWords(db *sql.DB, words []string, searchByScore bool, domain ...strin
FROM inv_index inv INNER JOIN pages p ON inv.url = p.url
WHERE (%s)
AND (%s)
AND (%s)
AND (%s)
GROUP BY inv.url
ORDER BY %s
DESC
LIMIT 15
`, strings.Join(wordlist, " OR "), strings.Join(domains, " OR "), orderType)
`, strings.Join(wordlist, " OR "), strings.Join(domains, " OR "), strings.Join(nodomains, " AND "), strings.Join(languages, " OR "), orderType)
fmt.Println(words)
fmt.Println(query)
stmt, err := db.Prepare(query)
util.Check(err)

Wyświetl plik

@ -7,7 +7,6 @@ import (
"net/http"
"net/url"
"os"
"regexp"
"strings"
"syscall"
@ -61,17 +60,21 @@ var templates = template.Must(template.ParseFiles(
const useURLTitles = true
var sitePattern = regexp.MustCompile(`site:\S+`)
func (h RequestHandler) searchRoute(res http.ResponseWriter, req *http.Request) {
var query string
var domain string
view := &TemplateView{}
var domain string
var domains = []string{}
var nodomains = []string{}
var langs = []string{}
var queryFields = []string{}
if req.Method == http.MethodGet {
params := req.URL.Query()
if words, exists := params["q"]; exists && words[0] != "" {
query = words[0]
queryFields = strings.Fields(query)
}
// how to use: https://gist.github.com/cblgh/29991ba0a9e65cccbe14f4afd7c975f1
@ -80,29 +83,34 @@ func (h RequestHandler) searchRoute(res http.ResponseWriter, req *http.Request)
domain = strings.TrimPrefix(parts[0], "https://")
domain = strings.TrimPrefix(domain, "http://")
domain = strings.TrimSuffix(domain, "/")
} else if sitePattern.MatchString(query) {
// if user searched with "site:<domain>" in text box, behave the same way as if a query param was used
domain = sitePattern.FindString(query)[5:]
domains = append(domains, domain)
}
// if clear button was used -> clear site param / search text
if parts, exists := params["clear"]; exists && parts[0] != "" {
domain = ""
query = sitePattern.ReplaceAllString(query, "")
var newQueryFields []string;
fmt.Println("Query Fields:", queryFields)
for _, word := range queryFields {
// This could be more efficient by splitting arrays, but I'm going with the more readable version for now
if strings.HasPrefix(word, "site:") {
domains = append(domains, strings.TrimPrefix(word, "site:"))
} else if strings.HasPrefix(word, "-site:") {
nodomains = append(nodomains, strings.TrimPrefix(word, "-site:"))
} else if strings.HasPrefix(word, "lang:") {
langs = append(langs, strings.TrimPrefix(word, "lang:"))
} else {
newQueryFields = append(newQueryFields, word)
}
}
queryFields = newQueryFields;
}
if len(query) == 0 {
if len(queryFields) == 0 {
view.Data = IndexData{Tagline: h.config.General.Tagline, Placeholder: h.config.General.Placeholder}
h.renderView(res, "index", view)
return
}
var pages []types.PageData
if domain != "" {
pages = database.SearchWordsBySite(h.db, util.Inflect(strings.Fields(query)), domain)
} else {
pages = database.SearchWordsByScore(h.db, util.Inflect(strings.Fields(query)))
}
var pages = database.SearchWords(h.db, util.Inflect(queryFields), true, domains, nodomains, langs)
if useURLTitles {
for i, pageData := range pages {