Added domain validation

main
Štěpán Škorpil 2022-12-10 15:11:56 +01:00
rodzic 1cbd5df5ae
commit 425abd5af0
3 zmienionych plików z 22 dodań i 2 usunięć

Wyświetl plik

@ -4,6 +4,7 @@ import { NodeProvider } from '../../Fediverse/Providers/NodeProvider'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
import isDomainNotBanned from '../../Storage/Nodes/isDomainNotBanned'
import isDomainValid from '../../Storage/Nodes/isDomainValid.js'
export const findNewNodesOnPage = async (
elastic: ElasticClient,
@ -13,7 +14,9 @@ export const findNewNodesOnPage = async (
robotsTxt: RobotsTxt
): Promise<number> => {
let domains = await provider.retrieveNodes(node.domain, page, robotsTxt)
domains = domains.filter(isDomainNotBanned)
domains = domains.filter(
(domain: string): boolean => isDomainValid(domain) && isDomainNotBanned(domain)
)
console.log('Found nodes', {
count: domains.length,
domain: node.domain,

Wyświetl plik

@ -1,6 +1,7 @@
import fetchRobotsTxt from '../Fediverse/RobotsTxt/fetchRobotsTxt.js'
import { fetchNodeToProcess } from '../Storage/Nodes/fetchNodeToProcess'
import { ProviderRegistry } from '../Fediverse/Providers/ProviderRegistry'
import isDomainValid from '../Storage/Nodes/isDomainValid.js'
import { setNodeRefreshed } from '../Storage/Nodes/setNodeRefreshed'
import batchPromises from '../Utils/batchPromises.js'
import { refreshNodeInfo } from './NodeInfo/refreshNodeInfo'
@ -13,6 +14,7 @@ import { deleteOldFeeds } from '../Storage/Feeds/deleteOldFeeds'
import refreshNodeIps from './Dns/refreshNodeIps'
import { ElasticClient } from '../Storage/ElasticClient'
import updateNodeFeedStats from './Nodes/updateNodeFeedStats'
import deleteDomains from './Seed/deleteBannedNodes.js'
export const processNextNode = async (
elastic: ElasticClient,
@ -21,7 +23,12 @@ export const processNextNode = async (
console.info('#############################################')
let node = await fetchNodeToProcess(elastic)
node = await setNodeRefreshAttempted(elastic, node)
// TODO remove check later
if (!isDomainValid(node.domain)) {
console.info('Node domain is invalid, deleting node', { domain: node.domain })
await deleteDomains(elastic, [node.domain])
return
}
node = await refreshNodeIps(elastic, node)
const robotsTxt = await fetchRobotsTxt(node.domain)
node = await refreshNodeInfo(elastic, node, robotsTxt)

Wyświetl plik

@ -0,0 +1,10 @@
export default function isDomainValid (domain: string): boolean {
try {
// eslint-disable-next-line no-new
new URL(`https://${domain}/`)
} catch (e) {
console.info('Domain is invalid', { domain })
return false
}
return true
}