kopia lustrzana https://github.com/cblgh/lieu
32 wiersze
1.2 KiB
TOML
32 wiersze
1.2 KiB
TOML
[general]
|
|
name = "Sweet Webring"
|
|
tagline = "the search for the new—endless"
|
|
placeholder = "Search"
|
|
# used by the precrawl command and linked to in /about route
|
|
url = "https://example.com/"
|
|
port = 10001
|
|
|
|
[data]
|
|
# the source file should contain the crawl command's output
|
|
source = "data/crawled.txt"
|
|
# location & name of the sqlite database
|
|
database = "data/searchengine.db"
|
|
# contains words and phrases disqualifying scraped paragraphs from being presented in search results
|
|
heuristics = "data/heuristics.txt"
|
|
# aka stopwords, in the search engine biz: https://en.wikipedia.org/wiki/Stop_word
|
|
wordlist = "data/wordlist.txt"
|
|
|
|
[crawler]
|
|
# manually curated list of domains, or the output of the precrawl command
|
|
webring = "data/webring.txt"
|
|
# domains that are banned from being crawled but might originally be part of the webring
|
|
bannedDomains = "data/banned-domains.txt"
|
|
# file suffixes that are banned from being crawled
|
|
bannedSuffixes = "data/banned-suffixes.txt"
|
|
# phrases and words which won't be scraped (e.g. if a contained in a link)
|
|
boringWords = "data/boring-words.txt"
|
|
# domains that won't be output as outgoing links
|
|
boringDomains = "data/boring-domains.txt"
|
|
# queries to search for finding preview text
|
|
previewQueryList = "data/preview-query-list.txt"
|