diff --git a/database/database.go b/database/database.go index deecb07..ba7a7ac 100644 --- a/database/database.go +++ b/database/database.go @@ -19,10 +19,13 @@ import ( "log" "net/url" "strings" + "regexp" _ "github.com/mattn/go-sqlite3" ) +var languageCodeSanityRegex = regexp.MustCompile("^[a-zA-Z\\-0-9]+$") + func InitDB(filepath string) *sql.DB { db, err := sql.Open("sqlite3", filepath) if err != nil { @@ -95,17 +98,19 @@ query params: &order=score, &order=count */ +var emptyStringArray = []string{} + func SearchWordsByScore(db *sql.DB, words []string) []types.PageData { - return searchWords(db, words, true) + return SearchWords(db, words, true, emptyStringArray, emptyStringArray, emptyStringArray) } func SearchWordsBySite(db *sql.DB, words []string, domain string) []types.PageData { // search words by site is same as search words by score, but adds a domain condition - return searchWords(db, words, true, domain) + return SearchWords(db, words, true, []string{domain}, emptyStringArray, emptyStringArray) } func SearchWordsByCount(db *sql.DB, words []string) []types.PageData { - return searchWords(db, words, false) + return SearchWords(db, words, false, emptyStringArray, emptyStringArray, emptyStringArray) } func FulltextSearchWords(db *sql.DB, phrase string) []types.PageData { @@ -222,12 +227,16 @@ func countQuery(db *sql.DB, table string) int { return count } -func searchWords(db *sql.DB, words []string, searchByScore bool, domain ...string) []types.PageData { - var wordlist []string +func SearchWords(db *sql.DB, words []string, searchByScore bool, domain []string, nodomain []string, language []string) []types.PageData { var args []interface{} - for _, word := range words { - wordlist = append(wordlist, "word = ?") - args = append(args, strings.ToLower(word)) + + wordlist := []string{"1"} + if len(words) > 0 && words[0] != "" { + wordlist = make([]string, 0) + for _, word := range words { + wordlist = append(wordlist, "word = ?") + args = append(args, strings.ToLower(word)) + } } // the domains conditional defaults to just 'true' i.e. no domain condition @@ -240,6 +249,28 @@ func searchWords(db *sql.DB, words []string, searchByScore bool, domain ...strin } } + nodomains := []string{"1"} + if len(nodomain) > 0 && nodomain[0] != "" { + nodomains = make([]string, 0) + for _, d := range nodomain { + nodomains = append(nodomains, "domain != ?") + args = append(args, d) + } + } + + //This needs some wildcard support … + languages := []string{"1"} + if len(language) > 0 && language[0] != "" { + languages = make([]string, 0) + for _, d := range language { + // Do a little check to avoid the database being DOSed + if languageCodeSanityRegex.MatchString(d) { + languages = append(languages, "lang LIKE ?") + args = append(args, d+"%") + } + } + } + orderType := "SUM(score)" if !searchByScore { orderType = "COUNT(*)" @@ -250,11 +281,16 @@ func searchWords(db *sql.DB, words []string, searchByScore bool, domain ...strin FROM inv_index inv INNER JOIN pages p ON inv.url = p.url WHERE (%s) AND (%s) + AND (%s) + AND (%s) GROUP BY inv.url ORDER BY %s DESC LIMIT 15 - `, strings.Join(wordlist, " OR "), strings.Join(domains, " OR "), orderType) + `, strings.Join(wordlist, " OR "), strings.Join(domains, " OR "), strings.Join(nodomains, " AND "), strings.Join(languages, " OR "), orderType) + + fmt.Println(words) + fmt.Println(query) stmt, err := db.Prepare(query) util.Check(err) diff --git a/server/server.go b/server/server.go index 9d599ce..d52221e 100644 --- a/server/server.go +++ b/server/server.go @@ -7,7 +7,6 @@ import ( "net/http" "net/url" "os" - "regexp" "strings" "syscall" @@ -61,17 +60,21 @@ var templates = template.Must(template.ParseFiles( const useURLTitles = true -var sitePattern = regexp.MustCompile(`site:\S+`) - func (h RequestHandler) searchRoute(res http.ResponseWriter, req *http.Request) { var query string + var domain string view := &TemplateView{} - var domain string + var domains = []string{} + var nodomains = []string{} + var langs = []string{} + var queryFields = []string{} + if req.Method == http.MethodGet { params := req.URL.Query() if words, exists := params["q"]; exists && words[0] != "" { query = words[0] + queryFields = strings.Fields(query) } // how to use: https://gist.github.com/cblgh/29991ba0a9e65cccbe14f4afd7c975f1 @@ -80,29 +83,34 @@ func (h RequestHandler) searchRoute(res http.ResponseWriter, req *http.Request) domain = strings.TrimPrefix(parts[0], "https://") domain = strings.TrimPrefix(domain, "http://") domain = strings.TrimSuffix(domain, "/") - } else if sitePattern.MatchString(query) { - // if user searched with "site:" in text box, behave the same way as if a query param was used - domain = sitePattern.FindString(query)[5:] + domains = append(domains, domain) } - // if clear button was used -> clear site param / search text - if parts, exists := params["clear"]; exists && parts[0] != "" { - domain = "" - query = sitePattern.ReplaceAllString(query, "") + + var newQueryFields []string; + fmt.Println("Query Fields:", queryFields) + for _, word := range queryFields { + // This could be more efficient by splitting arrays, but I'm going with the more readable version for now + if strings.HasPrefix(word, "site:") { + domains = append(domains, strings.TrimPrefix(word, "site:")) + } else if strings.HasPrefix(word, "-site:") { + nodomains = append(nodomains, strings.TrimPrefix(word, "-site:")) + } else if strings.HasPrefix(word, "lang:") { + langs = append(langs, strings.TrimPrefix(word, "lang:")) + } else { + newQueryFields = append(newQueryFields, word) + } } + queryFields = newQueryFields; + } - if len(query) == 0 { + if len(queryFields) == 0 { view.Data = IndexData{Tagline: h.config.General.Tagline, Placeholder: h.config.General.Placeholder} h.renderView(res, "index", view) return } - var pages []types.PageData - if domain != "" { - pages = database.SearchWordsBySite(h.db, util.Inflect(strings.Fields(query)), domain) - } else { - pages = database.SearchWordsByScore(h.db, util.Inflect(strings.Fields(query))) - } + var pages = database.SearchWords(h.db, util.Inflect(queryFields), true, domains, nodomains, langs) if useURLTitles { for i, pageData := range pages {