kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
97 wiersze
2.1 KiB
TypeScript
97 wiersze
2.1 KiB
TypeScript
import isRelativeUrlImpl from 'is-relative-url'
|
|
import normalizeUrlImpl, {
|
|
type Options as NormalizeUrlOptions
|
|
} from 'normalize-url'
|
|
import QuickLRU from 'quick-lru'
|
|
|
|
import { hashObject } from './utils.js'
|
|
|
|
const protocolAllowList = new Set(['https:', 'http:'])
|
|
const normalizedUrlCache = new QuickLRU<string, string>({
|
|
maxSize: 4000
|
|
})
|
|
|
|
export function isValidCrawlableUrl(url: string): boolean {
|
|
try {
|
|
if (!url || isRelativeUrl(url)) {
|
|
return false
|
|
}
|
|
|
|
const parsedUrl = new URL(url)
|
|
if (!protocolAllowList.has(parsedUrl.protocol)) {
|
|
return false
|
|
}
|
|
|
|
const normalizedUrl = normalizeUrl(url)
|
|
if (!normalizedUrl) {
|
|
return false
|
|
}
|
|
|
|
return true
|
|
} catch {
|
|
return false
|
|
}
|
|
}
|
|
|
|
export function isRelativeUrl(url: string): boolean {
|
|
if (!url || typeof url !== 'string') return false
|
|
|
|
return isRelativeUrlImpl(url) && !url.startsWith('//')
|
|
}
|
|
|
|
export function normalizeUrl(
|
|
url: string,
|
|
options?: NormalizeUrlOptions
|
|
): string | undefined {
|
|
let normalizedUrl: string | undefined
|
|
|
|
if (!url || isRelativeUrl(url)) {
|
|
return undefined
|
|
}
|
|
|
|
const opts = {
|
|
stripWWW: false,
|
|
defaultProtocol: 'https',
|
|
normalizeProtocol: true,
|
|
forceHttps: false,
|
|
stripHash: false,
|
|
stripTextFragment: true,
|
|
removeQueryParameters: [/^utm_\w+/i, 'ref', 'ref_src'],
|
|
removeTrailingSlash: true,
|
|
removeSingleSlash: true,
|
|
removeExplicitPort: true,
|
|
sortQueryParameters: true,
|
|
...options
|
|
} as Required<NormalizeUrlOptions>
|
|
|
|
const optionsHash = hashObject(opts)
|
|
const cacheKey = `${url}-${optionsHash}`
|
|
|
|
try {
|
|
normalizedUrl = normalizedUrlCache.get(cacheKey)
|
|
|
|
if (normalizedUrl !== undefined) {
|
|
if (normalizedUrl) {
|
|
return normalizedUrl
|
|
} else {
|
|
return undefined
|
|
}
|
|
}
|
|
|
|
normalizedUrl = normalizeUrlImpl(url, opts)
|
|
if (!normalizeUrl) {
|
|
normalizedUrl = ''
|
|
}
|
|
} catch {
|
|
// ignore invalid urls
|
|
normalizedUrl = ''
|
|
}
|
|
|
|
normalizedUrlCache.set(cacheKey, normalizedUrl!)
|
|
if (normalizedUrl) {
|
|
return normalizedUrl
|
|
} else {
|
|
return undefined
|
|
}
|
|
}
|