kopia lustrzana https://github.com/cblgh/lieu
Added a little check for the response code to not index pages that return errors or finish with codes in the 100 range
rodzic
34d6df3830
commit
ed5f5189b0
|
@ -294,6 +294,11 @@ func Crawl(config types.Config) {
|
||||||
|
|
||||||
// on every a element which has an href attribute, call callback
|
// on every a element which has an href attribute, call callback
|
||||||
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
|
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
|
||||||
|
|
||||||
|
if e.Response.StatusCode >= 400 || e.Response.StatusCode <= 100 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
link := getLink(e.Attr("href"))
|
link := getLink(e.Attr("href"))
|
||||||
if findSuffix(SUFFIXES, link) {
|
if findSuffix(SUFFIXES, link) {
|
||||||
return
|
return
|
||||||
|
|
Ładowanie…
Reference in New Issue