kopia lustrzana https://github.com/cblgh/lieu
Added a little check for the response code to not index pages that return errors or finish with codes in the 100 range
rodzic
34d6df3830
commit
ed5f5189b0
|
@ -294,6 +294,11 @@ func Crawl(config types.Config) {
|
|||
|
||||
// on every a element which has an href attribute, call callback
|
||||
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
|
||||
|
||||
if e.Response.StatusCode >= 400 || e.Response.StatusCode <= 100 {
|
||||
return
|
||||
}
|
||||
|
||||
link := getLink(e.Attr("href"))
|
||||
if findSuffix(SUFFIXES, link) {
|
||||
return
|
||||
|
|
Ładowanie…
Reference in New Issue