kopia lustrzana https://github.com/Stopka/fedicrawl
Porównaj commity
2 Commity
c0b10f2e7a
...
8f42791ee2
Autor | SHA1 | Data |
---|---|---|
Štěpán Škorpil | 8f42791ee2 | |
Štěpán Škorpil | 59f49ce29d |
|
@ -1,5 +1,6 @@
|
|||
import axios, { AxiosRequestConfig, AxiosResponse } from 'axios'
|
||||
import robotsParser from 'robots-parser'
|
||||
import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds.js'
|
||||
import RobotsTxt from './RobotsTxt.js'
|
||||
import { RobotsTxtError } from './RobotsTxtError.js'
|
||||
|
||||
|
@ -10,7 +11,10 @@ export default async function fetchRobotsTxt (domain: string): Promise<RobotsTxt
|
|||
const url = `https://${domain}/robots.txt`
|
||||
let content = ''
|
||||
try {
|
||||
const robotsTxt = await axios.get(url)
|
||||
const robotsTxt = await axios.get(url, {
|
||||
headers: { 'User-Agent': userAgent },
|
||||
timeout: getDefaultTimeoutMilliseconds()
|
||||
})
|
||||
content = robotsTxt.data
|
||||
} catch (error) {
|
||||
console.info('Robots.txt not found', { error, url })
|
||||
|
|
|
@ -13,7 +13,7 @@ export const refreshFeedsOnPage = async (
|
|||
robotsTxt: RobotsTxt
|
||||
): Promise<Feed[]> => {
|
||||
const feedData = await provider.retrieveFeeds(node.domain, page, robotsTxt)
|
||||
const indexableFeedData = feedData.filter(item => item.indexable && !item.description.includes('#noindex'))
|
||||
const indexableFeedData = feedData.filter(item => item.indexable && !item.description.includes('#nobot'))
|
||||
console.info('Retrieved feeds', {
|
||||
count: feedData.length,
|
||||
indexableCount: indexableFeedData.length,
|
||||
|
|
Ładowanie…
Reference in New Issue