kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
feat: improvements to clearbit, diffbot, proxycurl, scraper clients
rodzic
1fce6eec58
commit
a851965722
|
@ -1,11 +1,12 @@
|
||||||
import defaultKy from 'ky'
|
import defaultKy from 'ky'
|
||||||
import pThrottle from 'p-throttle'
|
import pThrottle from 'p-throttle'
|
||||||
|
|
||||||
import type { DeepNullable, KyInstance } from '../types.js'
|
import type { KyInstance } from '../types.js'
|
||||||
import {
|
import {
|
||||||
assert,
|
assert,
|
||||||
delay,
|
delay,
|
||||||
getEnv,
|
getEnv,
|
||||||
|
pruneNullOrUndefinedDeep,
|
||||||
sanitizeSearchParams,
|
sanitizeSearchParams,
|
||||||
throttleKy
|
throttleKy
|
||||||
} from '../utils.js'
|
} from '../utils.js'
|
||||||
|
@ -37,7 +38,7 @@ export namespace clearbit {
|
||||||
phoneNumbers: string[]
|
phoneNumbers: string[]
|
||||||
emailAddresses: string[]
|
emailAddresses: string[]
|
||||||
}
|
}
|
||||||
category: {
|
category: Partial<{
|
||||||
sector: string
|
sector: string
|
||||||
industryGroup: string
|
industryGroup: string
|
||||||
industry: string
|
industry: string
|
||||||
|
@ -48,14 +49,14 @@ export namespace clearbit {
|
||||||
naicsCode: string
|
naicsCode: string
|
||||||
naics6Codes: string[]
|
naics6Codes: string[]
|
||||||
naics6Codes2022: string[]
|
naics6Codes2022: string[]
|
||||||
}
|
}>
|
||||||
tags: string[]
|
tags: string[]
|
||||||
description: string
|
description: string
|
||||||
foundedYear: number
|
foundedYear: number
|
||||||
location: string
|
location: string
|
||||||
timeZone: string
|
timeZone: string
|
||||||
utcOffset: number
|
utcOffset: number
|
||||||
geo: {
|
geo: Partial<{
|
||||||
streetNumber: string
|
streetNumber: string
|
||||||
streetName: string
|
streetName: string
|
||||||
subPremise: string
|
subPremise: string
|
||||||
|
@ -68,16 +69,16 @@ export namespace clearbit {
|
||||||
countryCode: string
|
countryCode: string
|
||||||
lat: number
|
lat: number
|
||||||
lng: number
|
lng: number
|
||||||
}
|
}>
|
||||||
logo: string
|
logo: string
|
||||||
facebook: {
|
facebook: Partial<{
|
||||||
handle: string
|
handle: string
|
||||||
likes: number
|
likes: number
|
||||||
}
|
}>
|
||||||
linkedin: {
|
linkedin: {
|
||||||
handle: string
|
handle: string
|
||||||
}
|
}
|
||||||
twitter: {
|
twitter: Partial<{
|
||||||
handle: string
|
handle: string
|
||||||
id: string
|
id: string
|
||||||
bio: string
|
bio: string
|
||||||
|
@ -86,30 +87,30 @@ export namespace clearbit {
|
||||||
location: string
|
location: string
|
||||||
site: string
|
site: string
|
||||||
avatar: string
|
avatar: string
|
||||||
}
|
}>
|
||||||
crunchbase: {
|
crunchbase: {
|
||||||
handle: string
|
handle: string
|
||||||
}
|
}
|
||||||
emailProvider: boolean
|
emailProvider: boolean
|
||||||
type: string
|
type: string
|
||||||
ticker: string
|
ticker: string
|
||||||
identifiers: {
|
identifiers: Partial<{
|
||||||
usEIN: string
|
usEIN: string
|
||||||
usCIK: string
|
usCIK: string
|
||||||
}
|
}>
|
||||||
phone: string
|
phone: string
|
||||||
metrics: {
|
metrics: Partial<{
|
||||||
alexaUsRank: number
|
alexaUsRank: number
|
||||||
alexaGlobalRank: number
|
alexaGlobalRank: number
|
||||||
trafficRank: string
|
trafficRank: string
|
||||||
employees: number
|
employees: number
|
||||||
employeesRange: string
|
employeesRange: string
|
||||||
marketCap: string
|
marketCap: number
|
||||||
raised: number
|
raised: number
|
||||||
annualRevenue: string
|
annualRevenue: string
|
||||||
estimatedAnnualRevenue: string
|
estimatedAnnualRevenue: string
|
||||||
fiscalYearEnd: string
|
fiscalYearEnd: number
|
||||||
}
|
}>
|
||||||
indexedAt: string
|
indexedAt: string
|
||||||
tech: string[]
|
tech: string[]
|
||||||
techCategories: string[]
|
techCategories: string[]
|
||||||
|
@ -121,18 +122,18 @@ export namespace clearbit {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export type EmailLookupResponse = DeepNullable<{
|
export type EmailLookupResponse = Partial<{
|
||||||
id: string
|
id: string
|
||||||
name: {
|
name: Partial<{
|
||||||
fullName: string
|
fullName: string
|
||||||
givenName: string
|
givenName: string
|
||||||
familyName: string
|
familyName: string
|
||||||
}
|
}>
|
||||||
email: string
|
email: string
|
||||||
location: string
|
location: string
|
||||||
timeZone: string
|
timeZone: string
|
||||||
utcOffset: number
|
utcOffset: number
|
||||||
geo: {
|
geo: Partial<{
|
||||||
city: string
|
city: string
|
||||||
state: string
|
state: string
|
||||||
stateCode: string
|
stateCode: string
|
||||||
|
@ -140,22 +141,22 @@ export namespace clearbit {
|
||||||
countryCode: string
|
countryCode: string
|
||||||
lat: number
|
lat: number
|
||||||
lng: number
|
lng: number
|
||||||
}
|
}>
|
||||||
bio: string
|
bio: string
|
||||||
site: string
|
site: string
|
||||||
avatar: string
|
avatar: string
|
||||||
employment: {
|
employment: Partial<{
|
||||||
domain: string
|
domain: string
|
||||||
name: string
|
name: string
|
||||||
title: string
|
title: string
|
||||||
role: string
|
role: string
|
||||||
subRole: string
|
subRole: string
|
||||||
seniority: string
|
seniority: string
|
||||||
}
|
}>
|
||||||
facebook: {
|
facebook: {
|
||||||
handle: string
|
handle: string
|
||||||
}
|
}
|
||||||
github: {
|
github: Partial<{
|
||||||
handle: string
|
handle: string
|
||||||
id: string
|
id: string
|
||||||
avatar: string
|
avatar: string
|
||||||
|
@ -163,8 +164,8 @@ export namespace clearbit {
|
||||||
blog: string
|
blog: string
|
||||||
followers: number
|
followers: number
|
||||||
following: number
|
following: number
|
||||||
}
|
}>
|
||||||
twitter: {
|
twitter: Partial<{
|
||||||
handle: string
|
handle: string
|
||||||
id: string
|
id: string
|
||||||
bio: string
|
bio: string
|
||||||
|
@ -175,14 +176,14 @@ export namespace clearbit {
|
||||||
location: string
|
location: string
|
||||||
site: string
|
site: string
|
||||||
avatar: string
|
avatar: string
|
||||||
}
|
}>
|
||||||
linkedin: {
|
linkedin: {
|
||||||
handle: string
|
handle: string
|
||||||
}
|
}
|
||||||
googleplus: {
|
googleplus: {
|
||||||
handle: null
|
handle: null
|
||||||
}
|
}
|
||||||
gravatar: {
|
gravatar: Partial<{
|
||||||
handle: string
|
handle: string
|
||||||
urls: {
|
urls: {
|
||||||
value: string
|
value: string
|
||||||
|
@ -193,7 +194,7 @@ export namespace clearbit {
|
||||||
url: string
|
url: string
|
||||||
type: string
|
type: string
|
||||||
}[]
|
}[]
|
||||||
}
|
}>
|
||||||
fuzzy: boolean
|
fuzzy: boolean
|
||||||
emailProvider: boolean
|
emailProvider: boolean
|
||||||
indexedAt: string
|
indexedAt: string
|
||||||
|
@ -204,7 +205,7 @@ export namespace clearbit {
|
||||||
|
|
||||||
export type CompanyResponse = {
|
export type CompanyResponse = {
|
||||||
id: string
|
id: string
|
||||||
} & DeepNullable<CompanyNullableProps>
|
} & Partial<CompanyNullableProps>
|
||||||
|
|
||||||
export interface CompanySearchOptions {
|
export interface CompanySearchOptions {
|
||||||
/**
|
/**
|
||||||
|
@ -260,17 +261,17 @@ export namespace clearbit {
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface EmploymentAttributes {
|
export interface EmploymentAttributes {
|
||||||
company: string
|
company?: string
|
||||||
domain: string
|
domain?: string
|
||||||
linkedin: string
|
linkedin?: string
|
||||||
title: string
|
title?: string
|
||||||
role: string
|
role?: string
|
||||||
subRole: string
|
subRole?: string
|
||||||
seniority: string
|
seniority?: string
|
||||||
startDate: string
|
startDate?: string
|
||||||
endDate: string
|
endDate?: string
|
||||||
present: boolean
|
present?: boolean
|
||||||
highlight: boolean
|
highlight?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface EmailAttributes {
|
export interface EmailAttributes {
|
||||||
|
@ -291,7 +292,7 @@ export namespace clearbit {
|
||||||
|
|
||||||
export type PersonAttributesV2 = {
|
export type PersonAttributesV2 = {
|
||||||
id: string
|
id: string
|
||||||
} & DeepNullable<{
|
} & Partial<{
|
||||||
name: Name
|
name: Name
|
||||||
avatar: string
|
avatar: string
|
||||||
location: string
|
location: string
|
||||||
|
@ -554,31 +555,37 @@ export class ClearbitClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
async companyEnrichment(options: clearbit.CompanyEnrichmentOptions) {
|
async companyEnrichment(options: clearbit.CompanyEnrichmentOptions) {
|
||||||
return this.ky
|
const res = await this.ky
|
||||||
.get('https://company-stream.clearbit.com/v2/companies/find', {
|
.get('https://company-stream.clearbit.com/v2/companies/find', {
|
||||||
searchParams: sanitizeSearchParams(options)
|
searchParams: sanitizeSearchParams(options)
|
||||||
})
|
})
|
||||||
.json<clearbit.CompanyResponse>()
|
.json<clearbit.CompanyResponse>()
|
||||||
|
|
||||||
|
return pruneNullOrUndefinedDeep(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
async companySearch(options: clearbit.CompanySearchOptions) {
|
async companySearch(options: clearbit.CompanySearchOptions) {
|
||||||
return this.ky
|
const res = await this.ky
|
||||||
.get('https://discovery.clearbit.com/v1/companies/search', {
|
.get('https://discovery.clearbit.com/v1/companies/search', {
|
||||||
searchParams: sanitizeSearchParams(options)
|
searchParams: sanitizeSearchParams(options)
|
||||||
})
|
})
|
||||||
.json<clearbit.CompanySearchResponse>()
|
.json<clearbit.CompanySearchResponse>()
|
||||||
|
|
||||||
|
return pruneNullOrUndefinedDeep(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
async companyAutocomplete(name: string) {
|
async companyAutocomplete(name: string) {
|
||||||
return this.ky
|
const res = await this.ky
|
||||||
.get('https://autocomplete.clearbit.com/v1/companies/suggest', {
|
.get('https://autocomplete.clearbit.com/v1/companies/suggest', {
|
||||||
searchParams: { query: name }
|
searchParams: { query: name }
|
||||||
})
|
})
|
||||||
.json<clearbit.BasicCompanyResponse[]>()
|
.json<clearbit.BasicCompanyResponse[]>()
|
||||||
|
|
||||||
|
return pruneNullOrUndefinedDeep(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
async prospectorPeopleV2(options: clearbit.PeopleSearchOptionsV2) {
|
async prospectorPeopleV2(options: clearbit.PeopleSearchOptionsV2) {
|
||||||
return this.ky
|
const res = await this.ky
|
||||||
.get('https://prospector.clearbit.com/v2/people/search', {
|
.get('https://prospector.clearbit.com/v2/people/search', {
|
||||||
searchParams: sanitizeSearchParams({
|
searchParams: sanitizeSearchParams({
|
||||||
...options,
|
...options,
|
||||||
|
@ -589,10 +596,12 @@ export class ClearbitClient {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.json<clearbit.ProspectorResponseV2>()
|
.json<clearbit.ProspectorResponseV2>()
|
||||||
|
|
||||||
|
return pruneNullOrUndefinedDeep(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
async prospectorPeopleV1(options: clearbit.PeopleSearchOptionsV1) {
|
async prospectorPeopleV1(options: clearbit.PeopleSearchOptionsV1) {
|
||||||
return this.ky
|
const res = await this.ky
|
||||||
.get('https://prospector.clearbit.com/v1/people/search', {
|
.get('https://prospector.clearbit.com/v1/people/search', {
|
||||||
searchParams: sanitizeSearchParams({
|
searchParams: sanitizeSearchParams({
|
||||||
email: false,
|
email: false,
|
||||||
|
@ -604,6 +613,8 @@ export class ClearbitClient {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.json<clearbit.ProspectorResponseV1>()
|
.json<clearbit.ProspectorResponseV1>()
|
||||||
|
|
||||||
|
return pruneNullOrUndefinedDeep(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO Status code = 202 means the response was queued.
|
// TODO Status code = 202 means the response was queued.
|
||||||
|
@ -622,7 +633,8 @@ export class ClearbitClient {
|
||||||
})
|
})
|
||||||
|
|
||||||
if (response.status !== 202 || !maxRetries) {
|
if (response.status !== 202 || !maxRetries) {
|
||||||
return response.json<clearbit.EmailLookupResponse>()
|
const res = await response.json<clearbit.EmailLookupResponse>()
|
||||||
|
return pruneNullOrUndefinedDeep(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maxRetries && response.status === 202) {
|
if (maxRetries && response.status === 202) {
|
||||||
|
@ -637,7 +649,8 @@ export class ClearbitClient {
|
||||||
count++
|
count++
|
||||||
running = response.status === 202
|
running = response.status === 202
|
||||||
}
|
}
|
||||||
return response.json<clearbit.EmailLookupResponse>()
|
const res = await response.json<clearbit.EmailLookupResponse>()
|
||||||
|
return pruneNullOrUndefinedDeep(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error('clearbit email lookup error 202', { cause: response })
|
throw new Error('clearbit email lookup error 202', { cause: response })
|
||||||
|
@ -653,17 +666,21 @@ export class ClearbitClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
async revealCompanyFromIP(ip: string) {
|
async revealCompanyFromIP(ip: string) {
|
||||||
return this.ky
|
const res = await this.ky
|
||||||
.get('https://reveal.clearbit.com/v1/companies/find', {
|
.get('https://reveal.clearbit.com/v1/companies/find', {
|
||||||
searchParams: { ip }
|
searchParams: { ip }
|
||||||
})
|
})
|
||||||
.json<clearbit.CompanyRevealResponse>()
|
.json<clearbit.CompanyRevealResponse>()
|
||||||
.catch((_) => undefined)
|
.catch((_) => undefined)
|
||||||
|
|
||||||
|
if (res) {
|
||||||
|
return pruneNullOrUndefinedDeep(res)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static filterEmploymentProspectorV2(
|
static filterEmploymentProspectorV2(
|
||||||
companyName: string,
|
companyName: string,
|
||||||
employments: Array<DeepNullable<clearbit.EmploymentAttributes> | null> | null
|
employments?: Array<Partial<clearbit.EmploymentAttributes>>
|
||||||
) {
|
) {
|
||||||
if (employments && employments.length > 0) {
|
if (employments && employments.length > 0) {
|
||||||
// We filter by employment endDate because some people could have multiple
|
// We filter by employment endDate because some people could have multiple
|
||||||
|
|
|
@ -384,12 +384,14 @@ export namespace diffbot {
|
||||||
nbIncomingEdges?: number
|
nbIncomingEdges?: number
|
||||||
nbFollowers?: number
|
nbFollowers?: number
|
||||||
nbLocations?: number
|
nbLocations?: number
|
||||||
|
nbEmployees?: number
|
||||||
nbEmployeesMin?: number
|
nbEmployeesMin?: number
|
||||||
nbEmployeesMax?: number
|
nbEmployeesMax?: number
|
||||||
nbActiveEmployeeEdges?: number
|
nbActiveEmployeeEdges?: number
|
||||||
nbUniqueInvestors?: number
|
nbUniqueInvestors?: number
|
||||||
educations?: Education[]
|
educations?: Education[]
|
||||||
nationalities?: Nationality[]
|
nationalities?: Nationality[]
|
||||||
|
fullName?: string
|
||||||
allNames?: string[]
|
allNames?: string[]
|
||||||
skills?: Partial<BasicEntity>[]
|
skills?: Partial<BasicEntity>[]
|
||||||
children?: BasicEntity[]
|
children?: BasicEntity[]
|
||||||
|
@ -401,6 +403,8 @@ export namespace diffbot {
|
||||||
parents?: BasicEntity[]
|
parents?: BasicEntity[]
|
||||||
gender?: Gender
|
gender?: Gender
|
||||||
importance?: number
|
importance?: number
|
||||||
|
monthlyTraffic?: number
|
||||||
|
monthlyTrafficGrowth?: number
|
||||||
wikipediaPageviews?: number
|
wikipediaPageviews?: number
|
||||||
wikipediaPageviewsLastQuarterGrowth?: number
|
wikipediaPageviewsLastQuarterGrowth?: number
|
||||||
wikipediaPageviewsLastYear?: number
|
wikipediaPageviewsLastYear?: number
|
||||||
|
@ -459,6 +463,9 @@ export namespace diffbot {
|
||||||
stock?: Stock
|
stock?: Stock
|
||||||
companiesHouseIds?: string[]
|
companiesHouseIds?: string[]
|
||||||
yearlyRevenues?: AnnualRevenue[]
|
yearlyRevenues?: AnnualRevenue[]
|
||||||
|
revenue?: Amount
|
||||||
|
parentCompany?: BasicEntity
|
||||||
|
legalEntities?: BasicEntity[]
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface AnnualRevenue {
|
export interface AnnualRevenue {
|
||||||
|
|
|
@ -1935,6 +1935,7 @@ export namespace proxycurl {
|
||||||
export type SearchResult = z.infer<typeof SearchResultSchema>
|
export type SearchResult = z.infer<typeof SearchResultSchema>
|
||||||
|
|
||||||
export const ResultProfileSchema = z.object({
|
export const ResultProfileSchema = z.object({
|
||||||
|
linkedin_url: z.string().optional(),
|
||||||
acquisitions: PurpleAcquisitionSchema.optional(),
|
acquisitions: PurpleAcquisitionSchema.optional(),
|
||||||
affiliated_companies: z.array(PurpleAffiliatedCompanySchema).optional(),
|
affiliated_companies: z.array(PurpleAffiliatedCompanySchema).optional(),
|
||||||
background_cover_image_url: z.string().optional(),
|
background_cover_image_url: z.string().optional(),
|
||||||
|
@ -1963,7 +1964,12 @@ export namespace proxycurl {
|
||||||
updates: z.array(PurpleCompanyUpdateSchema).optional(),
|
updates: z.array(PurpleCompanyUpdateSchema).optional(),
|
||||||
website: z.string().optional()
|
website: z.string().optional()
|
||||||
})
|
})
|
||||||
export type ResultProfile = z.infer<typeof ResultProfileSchema>
|
export type CompanyProfile = z.infer<typeof ResultProfileSchema>
|
||||||
|
export type ResolvedCompanyProfile = {
|
||||||
|
url: string
|
||||||
|
last_updated: string
|
||||||
|
profile: CompanyProfile
|
||||||
|
}
|
||||||
|
|
||||||
export const CompanyUrlEnrichResultProfileSchema = z.object({
|
export const CompanyUrlEnrichResultProfileSchema = z.object({
|
||||||
acquisitions: FluffyAcquisitionSchema.optional(),
|
acquisitions: FluffyAcquisitionSchema.optional(),
|
||||||
|
@ -2087,8 +2093,8 @@ export class ProxycurlClient extends AIFunctionsProvider {
|
||||||
})
|
})
|
||||||
async getLinkedInCompany(
|
async getLinkedInCompany(
|
||||||
opts: proxycurl.CompanyProfileEndpointParamsQueryClass
|
opts: proxycurl.CompanyProfileEndpointParamsQueryClass
|
||||||
) {
|
): Promise<proxycurl.CompanyProfile> {
|
||||||
return this.ky
|
const res = await this.ky
|
||||||
.get('api/linkedin/company', {
|
.get('api/linkedin/company', {
|
||||||
searchParams: sanitizeSearchParams({
|
searchParams: sanitizeSearchParams({
|
||||||
funding_data: 'include',
|
funding_data: 'include',
|
||||||
|
@ -2097,7 +2103,12 @@ export class ProxycurlClient extends AIFunctionsProvider {
|
||||||
...opts
|
...opts
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.json<proxycurl.ResultProfile>()
|
.json<proxycurl.CompanyProfile>()
|
||||||
|
|
||||||
|
return {
|
||||||
|
linkedin_url: opts.url,
|
||||||
|
...res
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@aiFunction({
|
@aiFunction({
|
||||||
|
@ -2181,15 +2192,20 @@ export class ProxycurlClient extends AIFunctionsProvider {
|
||||||
})
|
})
|
||||||
async resolveLinkedInCompany(
|
async resolveLinkedInCompany(
|
||||||
opts: proxycurl.CompanyLookupEndpointParamsQueryClass
|
opts: proxycurl.CompanyLookupEndpointParamsQueryClass
|
||||||
) {
|
): Promise<proxycurl.CompanyProfile> {
|
||||||
return this.ky
|
const res = await this.ky
|
||||||
.get('api/linkedin/company/resolve', {
|
.get('api/linkedin/company/resolve', {
|
||||||
searchParams: sanitizeSearchParams({
|
searchParams: sanitizeSearchParams({
|
||||||
enrich_profile: 'enrich',
|
enrich_profile: 'enrich',
|
||||||
...opts
|
...opts
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.json<proxycurl.ResultProfile>()
|
.json<proxycurl.ResolvedCompanyProfile>()
|
||||||
|
|
||||||
|
return {
|
||||||
|
linkedin_url: res.url,
|
||||||
|
...res.profile
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@aiFunction({
|
@aiFunction({
|
||||||
|
|
|
@ -1,10 +1,18 @@
|
||||||
import defaultKy, { type KyInstance } from 'ky'
|
import defaultKy, { type KyInstance } from 'ky'
|
||||||
|
import pThrottle from 'p-throttle'
|
||||||
import { z } from 'zod'
|
import { z } from 'zod'
|
||||||
|
|
||||||
import { aiFunction, AIFunctionsProvider } from '../fns.js'
|
import { aiFunction, AIFunctionsProvider } from '../fns.js'
|
||||||
import { assert, getEnv, omit } from '../utils.js'
|
import { assert, getEnv, omit, throttleKy } from '../utils.js'
|
||||||
|
|
||||||
export namespace scraper {
|
export namespace scraper {
|
||||||
|
// Allow up to 1 request per second by default.
|
||||||
|
export const throttle = pThrottle({
|
||||||
|
limit: 1,
|
||||||
|
interval: 1000,
|
||||||
|
strict: true
|
||||||
|
})
|
||||||
|
|
||||||
export type ScrapeResult = {
|
export type ScrapeResult = {
|
||||||
author: string
|
author: string
|
||||||
byline: string
|
byline: string
|
||||||
|
@ -47,10 +55,12 @@ export class ScraperClient extends AIFunctionsProvider {
|
||||||
|
|
||||||
constructor({
|
constructor({
|
||||||
apiBaseUrl = getEnv('SCRAPER_API_BASE_URL'),
|
apiBaseUrl = getEnv('SCRAPER_API_BASE_URL'),
|
||||||
|
throttle = true,
|
||||||
ky = defaultKy
|
ky = defaultKy
|
||||||
}: {
|
}: {
|
||||||
apiKey?: string
|
apiKey?: string
|
||||||
apiBaseUrl?: string
|
apiBaseUrl?: string
|
||||||
|
throttle?: boolean
|
||||||
ky?: KyInstance
|
ky?: KyInstance
|
||||||
} = {}) {
|
} = {}) {
|
||||||
assert(
|
assert(
|
||||||
|
@ -60,7 +70,9 @@ export class ScraperClient extends AIFunctionsProvider {
|
||||||
super()
|
super()
|
||||||
|
|
||||||
this.apiBaseUrl = apiBaseUrl
|
this.apiBaseUrl = apiBaseUrl
|
||||||
this.ky = ky.extend({ prefixUrl: this.apiBaseUrl })
|
|
||||||
|
const throttledKy = throttle ? throttleKy(ky, scraper.throttle) : ky
|
||||||
|
this.ky = throttledKy.extend({ prefixUrl: this.apiBaseUrl })
|
||||||
}
|
}
|
||||||
|
|
||||||
@aiFunction({
|
@aiFunction({
|
||||||
|
@ -99,6 +111,15 @@ export class ScraperClient extends AIFunctionsProvider {
|
||||||
})
|
})
|
||||||
.json<scraper.ScrapeResult>()
|
.json<scraper.ScrapeResult>()
|
||||||
|
|
||||||
|
if (res.length <= 40) {
|
||||||
|
try {
|
||||||
|
const message = (JSON.parse(res.textContent as string) as any).message
|
||||||
|
throw new Error(`Failed to scrape URL "${opts.url}": ${message}`)
|
||||||
|
} catch {
|
||||||
|
throw new Error(`Failed to scrape URL "${opts.url}"`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case 'html':
|
case 'html':
|
||||||
return omit(res, 'markdownContent', 'textContent', 'rawHtml')
|
return omit(res, 'markdownContent', 'textContent', 'rawHtml')
|
||||||
|
|
|
@ -67,6 +67,29 @@ export function pruneNullOrUndefined<T extends Record<string, any>>(
|
||||||
) as NonNullable<T>
|
) as NonNullable<T>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function pruneNullOrUndefinedDeep<T extends Record<string, any>>(
|
||||||
|
obj: T
|
||||||
|
): NonNullable<{ [K in keyof T]: Exclude<T[K], undefined | null> }> {
|
||||||
|
if (!obj || Array.isArray(obj) || typeof obj !== 'object') return obj
|
||||||
|
|
||||||
|
return Object.fromEntries(
|
||||||
|
Object.entries(obj)
|
||||||
|
.filter(([, value]) => value !== undefined && value !== null)
|
||||||
|
.map(([key, value]) =>
|
||||||
|
Array.isArray(value)
|
||||||
|
? [
|
||||||
|
key,
|
||||||
|
value
|
||||||
|
.filter((v) => v !== undefined && v !== null)
|
||||||
|
.map(pruneNullOrUndefinedDeep as any)
|
||||||
|
]
|
||||||
|
: typeof value === 'object'
|
||||||
|
? [key, pruneNullOrUndefinedDeep(value)]
|
||||||
|
: [key, value]
|
||||||
|
)
|
||||||
|
) as NonNullable<T>
|
||||||
|
}
|
||||||
|
|
||||||
export function getEnv(name: string): string | undefined {
|
export function getEnv(name: string): string | undefined {
|
||||||
try {
|
try {
|
||||||
return typeof process !== 'undefined'
|
return typeof process !== 'undefined'
|
||||||
|
|
Ładowanie…
Reference in New Issue