diff --git a/.gitignore b/.gitignore
index 4ae5b97..0b898b7 100755
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
+# Lieu
+data/
+searchengine.db
+
#~top ignores~
node_modules/
*.vim
diff --git a/README.md b/README.md
index 5aaa335..4e222ae 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
# Lieu
+
_an alternative search engine_
Created in response to the environs of apathy concerning the use of hypertext
@@ -10,11 +11,13 @@ engine, a way for personal webrings to increase serendipitous connexions.
## Goals
+
* Enable serendipitous discovery
* Support personal communities
* Be reusable, easily
## Usage
+
```
$ lieu help
Lieu: neighbourhood search engine
@@ -28,6 +31,7 @@ Commands
Example:
lieu precrawl > data/webring.txt
+ lieu crawl > data/crawled.txt
lieu ingest
lieu host
```
@@ -39,12 +43,13 @@ the files Lieu reads from, as defined in the config file. See below for a
typical workflow.
### Workflow
+
* Edit the config
* Add domains to crawl in `config.crawler.webring`
* **If you have a webpage with links you want to crawl:**
* Set the config's `url` field to that page
* Populate the list of domains to crawl with `precrawl`: `lieu precrawl > data/webring.txt`
-* Crawl: `lieu crawl > data/source.txt`
+* Crawl: `lieu crawl > data/crawled.txt`
* Create database: `lieu ingest`
* Host engine: `lieu host`
@@ -52,6 +57,7 @@ After ingesting the data with `lieu ingest`, you can also use lieu to search the
corpus in the terminal with `lieu search`.
## Config
+
The config file is written in [TOML](https://toml.io/en/).
```toml
@@ -85,6 +91,7 @@ boringDomains = "data/boring-domains.txt"
```
For your own use, the following config fields should be customized:
+
* `name`
* `url `
* `port`
@@ -93,6 +100,7 @@ For your own use, the following config fields should be customized:
* `bannedDomains`
The following config-defined files can stay as-is unless you have specific requirements:
+
* `database`
* `heuristics`
* `wordlist`
@@ -102,5 +110,6 @@ For a full rundown of the files and their various jobs, see the [files
description](docs/files.md).
### License
+
Source code `AGPL-3.0-or-later`, Inter is available under `SIL OPEN FONT
LICENSE Version 1.1`, Noto Serif is licensed as `Apache License, Version 2.0`.
diff --git a/crawler/crawler.go b/crawler/crawler.go
index 99ac202..e9516e2 100644
--- a/crawler/crawler.go
+++ b/crawler/crawler.go
@@ -9,7 +9,7 @@ import (
"net/url"
"regexp"
"strings"
- "time"
+ "time"
"github.com/PuerkitoBio/goquery"
"github.com/gocolly/colly/v2"
@@ -85,7 +85,6 @@ func getDomains(links []string) []string {
return domains
}
-
func findSuffix(suffixes []string, query string) bool {
for _, suffix := range suffixes {
if strings.HasSuffix(strings.ToLower(query), suffix) {
@@ -208,8 +207,8 @@ func Crawl(config types.Config) {
c.AllowURLRevisit = false
c.DisallowedDomains = getBannedDomains(config.Crawler.BannedDomains)
- delay, _ := time.ParseDuration("200ms")
- c.Limit(&colly.LimitRule{DomainGlob: "*", Delay: delay, Parallelism: 3})
+ delay, _ := time.ParseDuration("200ms")
+ c.Limit(&colly.LimitRule{DomainGlob: "*", Delay: delay, Parallelism: 3})
boringDomains := getBoringDomains(config.Crawler.BoringDomains)
boringWords := getBoringWords(config.Crawler.BoringWords)
diff --git a/go.sum b/go.sum
index 7278df0..3f96fb3 100644
--- a/go.sum
+++ b/go.sum
@@ -66,6 +66,7 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
+github.com/yuin/goldmark v1.2.1 h1:ruQGxdhGHe7FWOJPT0mKs5+pD2Xs1Bm/kdGlHO04FmM=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
@@ -75,6 +76,7 @@ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -101,6 +103,8 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4 h1:myAQVi0cGEoqQVR5POX+8RR2mrocKqNN1hmeMqhX27k=
+golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
@@ -114,9 +118,14 @@ golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBn
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20210114065538-d78b04bdf963/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.0 h1:po9/4sTYwZU9lPhi1tOrb4hCv3qrhiQ77LZfGa2OjwY=
+golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
+golang.org/x/tour v0.0.0-20210317163553-0a3a62c5e5c0 h1:u0bliLHgSO64Pb0xbhtwNIHspZc11X8M1bJqBkYl4Co=
+golang.org/x/tour v0.0.0-20210317163553-0a3a62c5e5c0/go.mod h1:bWzMdWN2SiLomDzvESYfljDnNu60fUM2ATO8j09tZ5Y=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
diff --git a/html/about-template.html b/html/about-template.html
deleted file mode 100644
index 263300d..0000000
--- a/html/about-template.html
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
-
- Lieu—webring search engine
-
-
-
-
- About
-
-
- the search for the new—endless
-
- Lieu —an alternative search engine. Created in response to the environs of
- apathy concerning the use of hypertext search and discovery. In Lieu , the
- internet is not what is made searchable, but instead one's own neighbourhood. Put differently,
- Lieu is a neighbourhood search engine, a way for personal webrings to increase
- serendipitous connexions.
-
-
- This instance indexes the {{.InstanceName}} —{{ .DomainCount }} domains,
- {{ .PageCount }} pages, {{ .TermCount }} search terms.
- Some domains of the webring have been filtered out for a better search experience,
- see the filtered list . Visit a random page .
-
- Lieu was created by cblgh at the onset of 2021.
- For Lieu's AGPL licensed source code, the repository .
-
-
-
diff --git a/html/about.html b/html/about.html
new file mode 100644
index 0000000..ce5019a
--- /dev/null
+++ b/html/about.html
@@ -0,0 +1,21 @@
+{{ template "head" . }}
+{{ template "nav" . }}
+
+
+ Lieu —an alternative search engine. Created in response to the environs of
+ apathy concerning the use of hypertext search and discovery. In Lieu , the
+ internet is not what is made searchable, but instead one's own neighbourhood. Put differently,
+ Lieu is a neighbourhood search engine, a way for personal webrings to increase
+ serendipitous connexions.
+
+
+ This instance indexes the {{ .Data.WebringName }} - {{ .Data.DomainCount }} domains,
+ {{ .Data.PageCount }} pages, {{ .Data.TermCount }} search terms.
+ Some domains of the webring have been filtered out for a better search experience,
+ see the filtered list .
+ Visit a random page .
+
+ Lieu was created by cblgh at the onset of 2021.
+ For Lieu's AGPL licensed source code, the repository .
+
+{{ template "footer" . }}
diff --git a/html/assets/NotoSerif-Bold.ttf b/html/assets/NotoSerif-Bold.ttf
old mode 100755
new mode 100644
diff --git a/html/assets/NotoSerif-Italic.ttf b/html/assets/NotoSerif-Italic.ttf
old mode 100755
new mode 100644
diff --git a/html/assets/NotoSerif-Regular.ttf b/html/assets/NotoSerif-Regular.ttf
old mode 100755
new mode 100644
diff --git a/html/assets/about.css b/html/assets/about.css
deleted file mode 100644
index 51ffbf0..0000000
--- a/html/assets/about.css
+++ /dev/null
@@ -1,24 +0,0 @@
-@import url("base.css");
-
-html {
- max-width: 31rem;
-}
-
-h1 {
- font-size: 3rem;
- margin-bottom: 0.5rem;
-}
-
-h2 {
- font-family: "Noto Serif";
- font-style: italic;
- font-weight: 400;
- font-size: 1.5rem;
- margin-top: 0;
- margin-bottom: 2rem;
-}
-
-.lieu {
- font-family: "Noto Serif";
- font-weight: 400;
-}
diff --git a/html/assets/base.css b/html/assets/base.css
index 6c76173..71d3224 100644
--- a/html/assets/base.css
+++ b/html/assets/base.css
@@ -42,9 +42,9 @@ html {
font-family: "Inter UI", sans-serif;
background: var(--secondary);
color: var(--primary);
- max-width: 650px;
padding-bottom: 2rem;
padding-left: 2rem;
+ padding-right: 2rem;
margin-top: 2rem;
}
@@ -141,7 +141,10 @@ and (max-device-width : 720px)
padding-left: 0.75rem;
padding-right: 0.75rem;
font-size: 30pt;
- max-width: 100vw;
+ max-width: 100vw !important;
+ }
+ #results {
+ display: grid;
}
}
diff --git a/html/assets/search.css b/html/assets/search.css
deleted file mode 100644
index 817d834..0000000
--- a/html/assets/search.css
+++ /dev/null
@@ -1,27 +0,0 @@
-@import url('base.css');
-
-main {
- columns: 2;
-}
-
-.entry {
- -webkit-column-break-inside: avoid;
- -moz-column-break-inside:avoid;
- -moz-page-break-inside:avoid;
- page-break-inside: avoid;
- break-inside: avoid-column;
-}
-
-.link {
- font-style: italic;
-}
-
-@media
-only screen
-and (min-device-width : 320px)
-and (max-device-width : 720px)
-{
- main {
- columns: 1 !important;
- }
-}
diff --git a/html/assets/startpage.css b/html/assets/startpage.css
deleted file mode 100644
index da87f3f..0000000
--- a/html/assets/startpage.css
+++ /dev/null
@@ -1,24 +0,0 @@
-@import url("about.css");
-
-html {
- max-width: 100vw;
-}
-
-h2 {
- margin-bottom: 1rem;
-}
-
-main {
- display: grid;
- justify-items: center;
- align-items: center;
- margin-top: 10rem;
-}
-
-.search-container {
- grid-template-columns: 19rem 3rem;
-}
-
-.lieu-container {
- justify-items: start;
-}
diff --git a/html/assets/style.css b/html/assets/style.css
new file mode 100644
index 0000000..cf5f98f
--- /dev/null
+++ b/html/assets/style.css
@@ -0,0 +1,106 @@
+@import url("base.css");
+
+h1 {
+ font-size: 3rem;
+ margin-bottom: 0rem;
+}
+
+h2 {
+ font-family: "Noto Serif";
+ font-weight: 400;
+ font-size: 1.5rem;
+ margin-top: 0;
+ margin-bottom: 1rem;
+}
+
+.lieu-container h2 {
+ font-style: italic;
+}
+
+header {
+ clear: both;
+ display: grid;
+ grid-auto-flow: column;
+ grid-template-columns: max-content max-content 1fr;
+ grid-column-gap: 1rem;
+ align-items: start;
+}
+
+header h2 a, header h2 a:hover {
+ border-bottom: none;
+}
+
+header ul {
+ justify-self: end;
+ margin-top: 0.5rem;
+ grid-column-start: 3;
+}
+
+header ul li {
+ margin-left: 1.5rem;
+ display: inline-block;
+}
+
+header ul li:first-of-type {
+ margin-left: 0;
+}
+
+main {
+ display: grid;
+ justify-items: left;
+ align-items: left;
+ margin-top: 1rem;
+}
+
+main#results {
+ display: block;
+ margin-top: 4rem;
+ columns: 2;
+ max-width: 1200px;
+}
+
+main#about {
+ max-width: 600px;
+}
+
+.lieu {
+ font-family: "Noto Serif";
+ font-weight: 400;
+}
+
+.search-container {
+ grid-template-columns: 19rem 3rem;
+}
+
+.lieu-container {
+ display: grid;
+ justify-items: center;
+ align-items: center;
+ margin-top: 5rem;
+ width: 100%;
+}
+
+.entry {
+ -webkit-column-break-inside: avoid;
+ -moz-column-break-inside:avoid;
+ -moz-page-break-inside:avoid;
+ page-break-inside: avoid;
+ break-inside: avoid-column;
+ margin-bottom: 1rem;
+}
+
+.entry p {
+ color: var(--primary);
+ opacity: 0.45;
+}
+
+.link {
+ font-style: italic;
+}
+
+@media only screen and (min-device-width : 320px) and (max-device-width : 720px) {
+ main {
+ columns: 1 !important;
+ }
+}
+
diff --git a/html/footer.html b/html/footer.html
new file mode 100644
index 0000000..f2e2d31
--- /dev/null
+++ b/html/footer.html
@@ -0,0 +1,4 @@
+{{ define "footer" }}
+