Porównaj commity

...

2 Commity

Autor SHA1 Wiadomość Data
Štěpán Škorpil 8f42791ee2 Added timeout to robots.txt fetching 2022-11-22 18:52:24 +01:00
Štěpán Škorpil 59f49ce29d Changed tag from noindex to nobot 2022-11-22 18:39:05 +01:00
2 zmienionych plików z 6 dodań i 2 usunięć

Wyświetl plik

@ -1,5 +1,6 @@
import axios, { AxiosRequestConfig, AxiosResponse } from 'axios'
import robotsParser from 'robots-parser'
import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds.js'
import RobotsTxt from './RobotsTxt.js'
import { RobotsTxtError } from './RobotsTxtError.js'
@ -10,7 +11,10 @@ export default async function fetchRobotsTxt (domain: string): Promise<RobotsTxt
const url = `https://${domain}/robots.txt`
let content = ''
try {
const robotsTxt = await axios.get(url)
const robotsTxt = await axios.get(url, {
headers: { 'User-Agent': userAgent },
timeout: getDefaultTimeoutMilliseconds()
})
content = robotsTxt.data
} catch (error) {
console.info('Robots.txt not found', { error, url })

Wyświetl plik

@ -13,7 +13,7 @@ export const refreshFeedsOnPage = async (
robotsTxt: RobotsTxt
): Promise<Feed[]> => {
const feedData = await provider.retrieveFeeds(node.domain, page, robotsTxt)
const indexableFeedData = feedData.filter(item => item.indexable && !item.description.includes('#noindex'))
const indexableFeedData = feedData.filter(item => item.indexable && !item.description.includes('#nobot'))
console.info('Retrieved feeds', {
count: feedData.length,
indexableCount: indexableFeedData.length,