kopia lustrzana https://github.com/cblgh/lieu
wip fts
rodzic
34b4978895
commit
793a9867cc
|
@ -69,6 +69,7 @@ func createTables(db *sql.DB) {
|
|||
url TEXT NOT NULL,
|
||||
FOREIGN KEY(url) REFERENCES pages(url)
|
||||
)`,
|
||||
`CREATE VIRTUAL TABLE IF NOT EXISTS external_links USING fts5 (url, tokenize="trigram")`,
|
||||
}
|
||||
|
||||
for _, query := range queries {
|
||||
|
@ -98,6 +99,29 @@ func SearchWordsByCount(db *sql.DB, words []string) []types.PageData {
|
|||
return searchWords(db, words, false)
|
||||
}
|
||||
|
||||
func FulltextSearchWords(db *sql.DB, phrase string) []types.PageData {
|
||||
query := fmt.Sprintf(`SELECT url from external_links WHERE url MATCH ? GROUP BY url ORDER BY RANDOM() LIMIT 30`)
|
||||
|
||||
stmt, err := db.Prepare(query)
|
||||
util.Check(err)
|
||||
defer stmt.Close()
|
||||
|
||||
rows, err := stmt.Query(phrase)
|
||||
util.Check(err)
|
||||
defer rows.Close()
|
||||
|
||||
var pageData types.PageData
|
||||
var pages []types.PageData
|
||||
for rows.Next() {
|
||||
if err := rows.Scan(&pageData.URL); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
pageData.Title = pageData.URL
|
||||
pages = append(pages, pageData)
|
||||
}
|
||||
return pages
|
||||
}
|
||||
|
||||
func GetDomainCount(db *sql.DB) int {
|
||||
return countQuery(db, "domains")
|
||||
}
|
||||
|
@ -123,6 +147,19 @@ func GetRandomDomain(db *sql.DB) string {
|
|||
return domain
|
||||
}
|
||||
|
||||
func GetRandomExternalLink(db *sql.DB) string {
|
||||
rows, err := db.Query("SELECT url FROM external_links ORDER BY RANDOM() LIMIT 1;")
|
||||
util.Check(err)
|
||||
defer rows.Close()
|
||||
|
||||
var link string
|
||||
for rows.Next() {
|
||||
err = rows.Scan(&link)
|
||||
util.Check(err)
|
||||
}
|
||||
return link
|
||||
}
|
||||
|
||||
func GetRandomPage(db *sql.DB) string {
|
||||
domain := GetRandomDomain(db)
|
||||
stmt, err := db.Prepare("SELECT url FROM pages WHERE domain = ? ORDER BY RANDOM() LIMIT 1;")
|
||||
|
@ -242,3 +279,17 @@ func InsertManyWords(db *sql.DB, batch []types.SearchFragment) {
|
|||
_, err := db.Exec(stmt, args...)
|
||||
util.Check(err)
|
||||
}
|
||||
|
||||
func InsertManyExternalLinks(db *sql.DB, externalLinks []string) {
|
||||
values := make([]string, 0, len(externalLinks))
|
||||
args := make([]interface{}, 0, len(externalLinks))
|
||||
|
||||
for _, externalLink := range externalLinks {
|
||||
values = append(values, "(?)")
|
||||
args = append(args, externalLink)
|
||||
}
|
||||
|
||||
stmt := fmt.Sprintf(`INSERT OR IGNORE INTO external_links(url) VALUES %s`, strings.Join(values, ","))
|
||||
_, err := db.Exec(stmt, args...)
|
||||
util.Check(err)
|
||||
}
|
||||
|
|
|
@ -80,6 +80,7 @@ func Ingest(config types.Config) {
|
|||
var count int
|
||||
var batchsize = 100
|
||||
batch := make([]types.SearchFragment, 0, 0)
|
||||
var externalLinks []string
|
||||
|
||||
scanner := bufio.NewScanner(buf)
|
||||
for scanner.Scan() {
|
||||
|
@ -141,6 +142,8 @@ func Ingest(config types.Config) {
|
|||
page.Lang = rawdata
|
||||
case "keywords":
|
||||
processed = strings.Split(strings.ReplaceAll(payload, ", ", ","), ",")
|
||||
case "non-webring-link":
|
||||
externalLinks = append(externalLinks, payload)
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
@ -162,7 +165,8 @@ func Ingest(config types.Config) {
|
|||
}
|
||||
|
||||
if len(pages) > batchsize {
|
||||
ingestBatch(db, batch, pages)
|
||||
ingestBatch(db, batch, pages, externalLinks)
|
||||
externalLinks = make([]string, 0, 0)
|
||||
batch = make([]types.SearchFragment, 0, 0)
|
||||
// TODO: make sure we don't partially insert any page data
|
||||
pages = make(map[string]types.PageData)
|
||||
|
@ -174,7 +178,7 @@ func Ingest(config types.Config) {
|
|||
util.Check(err)
|
||||
}
|
||||
|
||||
func ingestBatch(db *sql.DB, batch []types.SearchFragment, pageMap map[string]types.PageData) {
|
||||
func ingestBatch(db *sql.DB, batch []types.SearchFragment, pageMap map[string]types.PageData, links []string) {
|
||||
pages := make([]types.PageData, len(pageMap))
|
||||
i := 0
|
||||
for k := range pageMap {
|
||||
|
@ -185,6 +189,7 @@ func ingestBatch(db *sql.DB, batch []types.SearchFragment, pageMap map[string]ty
|
|||
database.InsertManyDomains(db, pages)
|
||||
database.InsertManyPages(db, pages)
|
||||
database.InsertManyWords(db, batch)
|
||||
database.InsertManyExternalLinks(db, links)
|
||||
log.Println("finished ingesting batch")
|
||||
}
|
||||
|
||||
|
|
|
@ -89,6 +89,35 @@ func (h RequestHandler) searchRoute(res http.ResponseWriter, req *http.Request)
|
|||
h.renderView(res, "search", view)
|
||||
}
|
||||
|
||||
func (h RequestHandler) externalSearchRoute(res http.ResponseWriter, req *http.Request) {
|
||||
var query string
|
||||
view := &TemplateView{}
|
||||
|
||||
if req.Method == http.MethodGet {
|
||||
params := req.URL.Query()
|
||||
if words, exists := params["q"]; exists && words[0] != "" {
|
||||
query = words[0]
|
||||
}
|
||||
}
|
||||
|
||||
pages := database.FulltextSearchWords(h.db, query)
|
||||
|
||||
if useURLTitles {
|
||||
for i, pageData := range pages {
|
||||
prettyURL, err := url.QueryUnescape(strings.TrimPrefix(strings.TrimPrefix(pageData.URL, "http://"), "https://"))
|
||||
util.Check(err)
|
||||
pageData.Title = prettyURL
|
||||
pages[i] = pageData
|
||||
}
|
||||
}
|
||||
|
||||
view.Data = SearchData{
|
||||
Query: query,
|
||||
Pages: pages,
|
||||
}
|
||||
h.renderView(res, "search", view)
|
||||
}
|
||||
|
||||
func (h RequestHandler) aboutRoute(res http.ResponseWriter, req *http.Request) {
|
||||
view := &TemplateView{}
|
||||
|
||||
|
@ -133,6 +162,11 @@ func (h RequestHandler) randomRoute(res http.ResponseWriter, req *http.Request)
|
|||
http.Redirect(res, req, link, http.StatusSeeOther)
|
||||
}
|
||||
|
||||
func (h RequestHandler) randomExternalRoute(res http.ResponseWriter, req *http.Request) {
|
||||
link := database.GetRandomExternalLink(h.db)
|
||||
http.Redirect(res, req, link, http.StatusSeeOther)
|
||||
}
|
||||
|
||||
func (h RequestHandler) webringRoute(res http.ResponseWriter, req *http.Request) {
|
||||
http.Redirect(res, req, h.config.General.URL, http.StatusSeeOther)
|
||||
}
|
||||
|
@ -157,6 +191,8 @@ func Serve(config types.Config) {
|
|||
|
||||
http.HandleFunc("/about", handler.aboutRoute)
|
||||
http.HandleFunc("/", handler.searchRoute)
|
||||
http.HandleFunc("/external", handler.externalSearchRoute)
|
||||
http.HandleFunc("/random/external", handler.randomExternalRoute)
|
||||
http.HandleFunc("/random", handler.randomRoute)
|
||||
http.HandleFunc("/webring", handler.webringRoute)
|
||||
http.HandleFunc("/filtered", handler.filteredRoute)
|
||||
|
|
Ładowanie…
Reference in New Issue