kopia lustrzana https://github.com/cblgh/lieu
Added experimental support for "-site:" and "lang:" queries
rodzic
b4a2e5e269
commit
b431a15441
|
@ -19,10 +19,13 @@ import (
|
|||
"log"
|
||||
"net/url"
|
||||
"strings"
|
||||
"regexp"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
var languageCodeSanityRegex = regexp.MustCompile("^[a-zA-Z\\-0-9]+$")
|
||||
|
||||
func InitDB(filepath string) *sql.DB {
|
||||
db, err := sql.Open("sqlite3", filepath)
|
||||
if err != nil {
|
||||
|
@ -95,17 +98,19 @@ query params:
|
|||
&order=score, &order=count
|
||||
*/
|
||||
|
||||
var emptyStringArray = []string{}
|
||||
|
||||
func SearchWordsByScore(db *sql.DB, words []string) []types.PageData {
|
||||
return searchWords(db, words, true)
|
||||
return SearchWords(db, words, true, emptyStringArray, emptyStringArray, emptyStringArray)
|
||||
}
|
||||
|
||||
func SearchWordsBySite(db *sql.DB, words []string, domain string) []types.PageData {
|
||||
// search words by site is same as search words by score, but adds a domain condition
|
||||
return searchWords(db, words, true, domain)
|
||||
return SearchWords(db, words, true, []string{domain}, emptyStringArray, emptyStringArray)
|
||||
}
|
||||
|
||||
func SearchWordsByCount(db *sql.DB, words []string) []types.PageData {
|
||||
return searchWords(db, words, false)
|
||||
return SearchWords(db, words, false, emptyStringArray, emptyStringArray, emptyStringArray)
|
||||
}
|
||||
|
||||
func FulltextSearchWords(db *sql.DB, phrase string) []types.PageData {
|
||||
|
@ -222,12 +227,16 @@ func countQuery(db *sql.DB, table string) int {
|
|||
return count
|
||||
}
|
||||
|
||||
func searchWords(db *sql.DB, words []string, searchByScore bool, domain ...string) []types.PageData {
|
||||
var wordlist []string
|
||||
func SearchWords(db *sql.DB, words []string, searchByScore bool, domain []string, nodomain []string, language []string) []types.PageData {
|
||||
var args []interface{}
|
||||
for _, word := range words {
|
||||
wordlist = append(wordlist, "word = ?")
|
||||
args = append(args, strings.ToLower(word))
|
||||
|
||||
wordlist := []string{"1"}
|
||||
if len(words) > 0 && words[0] != "" {
|
||||
wordlist = make([]string, 0)
|
||||
for _, word := range words {
|
||||
wordlist = append(wordlist, "word = ?")
|
||||
args = append(args, strings.ToLower(word))
|
||||
}
|
||||
}
|
||||
|
||||
// the domains conditional defaults to just 'true' i.e. no domain condition
|
||||
|
@ -240,6 +249,28 @@ func searchWords(db *sql.DB, words []string, searchByScore bool, domain ...strin
|
|||
}
|
||||
}
|
||||
|
||||
nodomains := []string{"1"}
|
||||
if len(nodomain) > 0 && nodomain[0] != "" {
|
||||
nodomains = make([]string, 0)
|
||||
for _, d := range nodomain {
|
||||
nodomains = append(nodomains, "domain != ?")
|
||||
args = append(args, d)
|
||||
}
|
||||
}
|
||||
|
||||
//This needs some wildcard support …
|
||||
languages := []string{"1"}
|
||||
if len(language) > 0 && language[0] != "" {
|
||||
languages = make([]string, 0)
|
||||
for _, d := range language {
|
||||
// Do a little check to avoid the database being DOSed
|
||||
if languageCodeSanityRegex.MatchString(d) {
|
||||
languages = append(languages, "lang LIKE ?")
|
||||
args = append(args, d+"%")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
orderType := "SUM(score)"
|
||||
if !searchByScore {
|
||||
orderType = "COUNT(*)"
|
||||
|
@ -250,11 +281,16 @@ func searchWords(db *sql.DB, words []string, searchByScore bool, domain ...strin
|
|||
FROM inv_index inv INNER JOIN pages p ON inv.url = p.url
|
||||
WHERE (%s)
|
||||
AND (%s)
|
||||
AND (%s)
|
||||
AND (%s)
|
||||
GROUP BY inv.url
|
||||
ORDER BY %s
|
||||
DESC
|
||||
LIMIT 15
|
||||
`, strings.Join(wordlist, " OR "), strings.Join(domains, " OR "), orderType)
|
||||
`, strings.Join(wordlist, " OR "), strings.Join(domains, " OR "), strings.Join(nodomains, " AND "), strings.Join(languages, " OR "), orderType)
|
||||
|
||||
fmt.Println(words)
|
||||
fmt.Println(query)
|
||||
|
||||
stmt, err := db.Prepare(query)
|
||||
util.Check(err)
|
||||
|
|
|
@ -7,7 +7,6 @@ import (
|
|||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
|
@ -61,17 +60,21 @@ var templates = template.Must(template.ParseFiles(
|
|||
|
||||
const useURLTitles = true
|
||||
|
||||
var sitePattern = regexp.MustCompile(`site:\S+`)
|
||||
|
||||
func (h RequestHandler) searchRoute(res http.ResponseWriter, req *http.Request) {
|
||||
var query string
|
||||
var domain string
|
||||
view := &TemplateView{}
|
||||
|
||||
var domain string
|
||||
var domains = []string{}
|
||||
var nodomains = []string{}
|
||||
var langs = []string{}
|
||||
var queryFields = []string{}
|
||||
|
||||
if req.Method == http.MethodGet {
|
||||
params := req.URL.Query()
|
||||
if words, exists := params["q"]; exists && words[0] != "" {
|
||||
query = words[0]
|
||||
queryFields = strings.Fields(query)
|
||||
}
|
||||
|
||||
// how to use: https://gist.github.com/cblgh/29991ba0a9e65cccbe14f4afd7c975f1
|
||||
|
@ -80,29 +83,34 @@ func (h RequestHandler) searchRoute(res http.ResponseWriter, req *http.Request)
|
|||
domain = strings.TrimPrefix(parts[0], "https://")
|
||||
domain = strings.TrimPrefix(domain, "http://")
|
||||
domain = strings.TrimSuffix(domain, "/")
|
||||
} else if sitePattern.MatchString(query) {
|
||||
// if user searched with "site:<domain>" in text box, behave the same way as if a query param was used
|
||||
domain = sitePattern.FindString(query)[5:]
|
||||
domains = append(domains, domain)
|
||||
}
|
||||
// if clear button was used -> clear site param / search text
|
||||
if parts, exists := params["clear"]; exists && parts[0] != "" {
|
||||
domain = ""
|
||||
query = sitePattern.ReplaceAllString(query, "")
|
||||
|
||||
var newQueryFields []string;
|
||||
fmt.Println("Query Fields:", queryFields)
|
||||
for _, word := range queryFields {
|
||||
// This could be more efficient by splitting arrays, but I'm going with the more readable version for now
|
||||
if strings.HasPrefix(word, "site:") {
|
||||
domains = append(domains, strings.TrimPrefix(word, "site:"))
|
||||
} else if strings.HasPrefix(word, "-site:") {
|
||||
nodomains = append(nodomains, strings.TrimPrefix(word, "-site:"))
|
||||
} else if strings.HasPrefix(word, "lang:") {
|
||||
langs = append(langs, strings.TrimPrefix(word, "lang:"))
|
||||
} else {
|
||||
newQueryFields = append(newQueryFields, word)
|
||||
}
|
||||
}
|
||||
queryFields = newQueryFields;
|
||||
|
||||
}
|
||||
|
||||
if len(query) == 0 {
|
||||
if len(queryFields) == 0 {
|
||||
view.Data = IndexData{Tagline: h.config.General.Tagline, Placeholder: h.config.General.Placeholder}
|
||||
h.renderView(res, "index", view)
|
||||
return
|
||||
}
|
||||
|
||||
var pages []types.PageData
|
||||
if domain != "" {
|
||||
pages = database.SearchWordsBySite(h.db, util.Inflect(strings.Fields(query)), domain)
|
||||
} else {
|
||||
pages = database.SearchWordsByScore(h.db, util.Inflect(strings.Fields(query)))
|
||||
}
|
||||
var pages = database.SearchWords(h.db, util.Inflect(queryFields), true, domains, nodomains, langs)
|
||||
|
||||
if useURLTitles {
|
||||
for i, pageData := range pages {
|
||||
|
|
Ładowanie…
Reference in New Issue