pull/643/head^2
Travis Fischer 2024-06-03 03:46:05 -05:00
rodzic 3caa185fbe
commit 5863f31f8b
4 zmienionych plików z 488 dodań i 129 usunięć

2
.gitignore vendored
Wyświetl plik

@ -37,3 +37,5 @@ yarn-error.log*
next-env.d.ts next-env.d.ts
.env .env
old/

Wyświetl plik

@ -9,7 +9,8 @@ import restoreCursor from 'restore-cursor'
// import { WikipediaClient } from '../src/index.js' // import { WikipediaClient } from '../src/index.js'
// import { PerigonClient } from '../src/index.js' // import { PerigonClient } from '../src/index.js'
// import { FirecrawlClient } from '../src/index.js' // import { FirecrawlClient } from '../src/index.js'
import { ExaClient } from '../src/index.js' // import { ExaClient } from '../src/index.js'
import { DiffbotClient } from '../src/index.js'
/** /**
* Scratch pad for testing. * Scratch pad for testing.
@ -57,12 +58,20 @@ async function main() {
// }) // })
// console.log(JSON.stringify(res, null, 2)) // console.log(JSON.stringify(res, null, 2))
const exa = new ExaClient() // const exa = new ExaClient()
const res = await exa.search({ // const res = await exa.search({
query: 'OpenAI', // query: 'OpenAI',
contents: { // contents: { text: true }
text: true // })
} // console.log(JSON.stringify(res, null, 2))
const diffbot = new DiffbotClient()
// const res = await diffbot.analyzeUrl({
// url: 'https://www.bbc.com/news/articles/cp4475gwny1o'
// })
const res = await diffbot.enhanceEntity({
type: 'Person',
name: 'Travis Fischer'
}) })
console.log(JSON.stringify(res, null, 2)) console.log(JSON.stringify(res, null, 2))
} }

Wyświetl plik

@ -40,7 +40,7 @@ export abstract class AIFunctionsProvider {
} }
export function aiFunction< export function aiFunction<
This, This extends AIFunctionsProvider,
InputSchema extends z.SomeZodObject, InputSchema extends z.SomeZodObject,
OptionalArgs extends Array<undefined>, OptionalArgs extends Array<undefined>,
Return extends types.MaybePromise<any> Return extends types.MaybePromise<any>

Wyświetl plik

@ -1,6 +1,8 @@
import defaultKy, { type KyInstance } from 'ky' import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle' import pThrottle from 'p-throttle'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, throttleKy } from '../utils.js' import { assert, getEnv, throttleKy } from '../utils.js'
export namespace diffbot { export namespace diffbot {
@ -8,13 +10,14 @@ export namespace diffbot {
export const KNOWLEDGE_GRAPH_API_BASE_URL = 'https://kg.diffbot.com' export const KNOWLEDGE_GRAPH_API_BASE_URL = 'https://kg.diffbot.com'
// Allow up to 5 requests per second by default. // Allow up to 5 requests per second by default.
// https://docs.diffbot.com/reference/rate-limits
export const throttle = pThrottle({ export const throttle = pThrottle({
limit: 5, limit: 5,
interval: 1000, interval: 1000,
strict: true strict: true
}) })
export interface DiffbotExtractOptions { export interface ExtractOptions {
/** Specify optional fields to be returned from any fully-extracted pages, e.g.: &fields=querystring,links. See available fields within each API's individual documentation pages. /** Specify optional fields to be returned from any fully-extracted pages, e.g.: &fields=querystring,links. See available fields within each API's individual documentation pages.
* @see https://docs.diffbot.com/reference/extract-optional-fields * @see https://docs.diffbot.com/reference/extract-optional-fields
*/ */
@ -45,8 +48,8 @@ export namespace diffbot {
customHeaders?: Record<string, string> customHeaders?: Record<string, string>
} }
export interface DiffbotExtractAnalyzeOptions extends DiffbotExtractOptions { export interface ExtractAnalyzeOptions extends ExtractOptions {
/** Web page URL of the analyze to process */ /** URL of the web page to process */
url: string url: string
/** By default the Analyze API will fully extract all pages that match an existing Automatic API -- articles, products or image pages. Set mode to a specific page-type (e.g., mode=article) to extract content only from that specific page-type. All other pages will simply return the default Analyze fields. */ /** By default the Analyze API will fully extract all pages that match an existing Automatic API -- articles, products or image pages. Set mode to a specific page-type (e.g., mode=article) to extract content only from that specific page-type. All other pages will simply return the default Analyze fields. */
@ -56,8 +59,8 @@ export namespace diffbot {
fallback?: string fallback?: string
} }
export interface DiffbotExtractArticleOptions extends DiffbotExtractOptions { export interface ExtractArticleOptions extends ExtractOptions {
/** Web page URL of the analyze to process */ /** URL of the web page to process */
url: string url: string
/** Set the maximum number of automatically-generated tags to return. By default a maximum of ten tags will be returned. */ /** Set the maximum number of automatically-generated tags to return. By default a maximum of ten tags will be returned. */
@ -70,15 +73,14 @@ export namespace diffbot {
naturalLanguage?: string[] naturalLanguage?: string[]
} }
export interface DiffbotExtractResponse { export interface ExtractResponse {
request: DiffbotRequest request: DiffbotRequest
objects: DiffbotObject[] objects: DiffbotObject[]
} }
export type DiffbotExtractArticleResponse = DiffbotExtractResponse export type ExtractArticleResponse = ExtractResponse
export interface DiffbotExtractAnalyzeResponse export interface ExtractAnalyzeResponse extends ExtractResponse {
extends DiffbotExtractResponse {
type: string type: string
title: string title: string
humanLanguage: string humanLanguage: string
@ -87,7 +89,7 @@ export namespace diffbot {
export interface DiffbotObject { export interface DiffbotObject {
date: string date: string
sentiment: number sentiment: number
images: DiffbotImage[] images: Image[]
author: string author: string
estimatedDate: string estimatedDate: string
publisherRegion: string publisherRegion: string
@ -96,44 +98,44 @@ export namespace diffbot {
siteName: string siteName: string
type: string type: string
title: string title: string
tags: DiffbotTag[] tags: Tag[]
publisherCountry: string publisherCountry: string
humanLanguage: string humanLanguage: string
authorUrl: string authorUrl: string
pageUrl: string pageUrl: string
html: string html: string
text: string text: string
categories?: DiffbotCategory[] categories?: ObjectCategory[]
authors: DiffbotAuthor[] authors: Author[]
breadcrumb?: DiffbotBreadcrumb[] breadcrumb?: Breadcrumb[]
items?: DiffbotListItem[] items?: ListItem[]
meta?: any meta?: any
} }
interface DiffbotListItem { export interface ListItem {
title: string title: string
link: string link: string
summary: string summary: string
image?: string image?: string
} }
interface DiffbotAuthor { export interface Author {
name: string name: string
link: string link: string
} }
interface DiffbotCategory { export interface ObjectCategory {
score: number score: number
name: string name: string
id: string id: string
} }
export interface DiffbotBreadcrumb { export interface Breadcrumb {
link: string link: string
name: string name: string
} }
interface DiffbotImage { export interface Image {
url: string url: string
diffbotUri: string diffbotUri: string
@ -146,29 +148,6 @@ export namespace diffbot {
primary?: boolean primary?: boolean
} }
interface DiffbotTag {
score: number
sentiment: number
count: number
label: string
uri: string
rdfTypes: string[]
}
interface DiffbotRequest {
pageUrl: string
api: string
version: number
}
export interface Image {
naturalHeight: number
diffbotUri: string
url: string
naturalWidth: number
primary: boolean
}
export interface Tag { export interface Tag {
score: number score: number
sentiment: number sentiment: number
@ -178,13 +157,13 @@ export namespace diffbot {
rdfTypes: string[] rdfTypes: string[]
} }
export interface Request { export interface DiffbotRequest {
pageUrl: string pageUrl: string
api: string api: string
version: number version: number
} }
export interface DiffbotKnowledgeGraphSearchOptions { export interface KnowledgeGraphSearchOptions {
type?: 'query' | 'text' | 'queryTextFallback' | 'crawl' type?: 'query' | 'text' | 'queryTextFallback' | 'crawl'
query: string query: string
col?: string col?: string
@ -206,8 +185,8 @@ export namespace diffbot {
report?: boolean report?: boolean
} }
export interface DiffbotKnowledgeGraphEnhanceOptions { export interface KnowledgeGraphEnhanceOptions {
type: 'Person' | 'Organization' type: EntityType
id?: string id?: string
name?: string name?: string
@ -233,8 +212,8 @@ export namespace diffbot {
nonCanonicalFacts?: boolean nonCanonicalFacts?: boolean
} }
export interface DiffbotKnowledgeGraphResponse { export interface KnowledgeGraphResponse {
data: DiffbotKnowledgeGraphNode[] data: KnowledgeGraphNode[]
version: number version: number
hits: number hits: number
results: number results: number
@ -244,10 +223,10 @@ export namespace diffbot {
errors?: any[] errors?: any[]
} }
export interface DiffbotKnowledgeGraphNode { export interface KnowledgeGraphNode {
score: number score: number
esscore?: number esscore?: number
entity: DiffbotKnowledgeGraphEntity entity: KnowledgeGraphEntity
entity_ctx: any entity_ctx: any
errors: string[] errors: string[]
callbackQuery: string callbackQuery: string
@ -258,77 +237,406 @@ export namespace diffbot {
uri: string uri: string
} }
export interface DiffbotKnowledgeGraphEntity { export interface KnowledgeGraphEntity {
id: string id: string
diffbotUri: string diffbotUri: string
type?: string type?: string
name: string name: string
images: DiffbotImage[] images: Image[]
origins: string[] origins: string[]
nbOrigins?: number nbOrigins?: number
gender?: DiffbotGender gender?: Gender
githubUri?: string githubUri?: string
importance?: number importance?: number
description?: string description?: string
homepageUri?: string homepageUri?: string
allNames?: string[] allNames?: string[]
skills?: DiffbotSkill[] skills?: Skill[]
crawlTimestamp?: number crawlTimestamp?: number
summary?: string summary?: string
image?: string image?: string
types?: string[] types?: string[]
nbIncomingEdges?: number nbIncomingEdges?: number
allUris?: string[] allUris?: string[]
employments?: DiffbotEmployment[] employments?: Employment[]
locations?: DiffbotLocation[] locations?: Location[]
location?: DiffbotLocation location?: Location
allOriginHashes?: string[] allOriginHashes?: string[]
nameDetail?: DiffbotNameDetail nameDetail?: NameDetail
} }
interface DiffbotEmployment { export type EntityType = 'Organization' | 'Place'
employer: Entity
export const EnhanceEntityOptionsSchema = z.object({
type: z.enum(['Person', 'Organization']),
id: z
.string()
.optional()
.describe('Diffbot ID of the entity to enhance if known'),
name: z
.union([z.string(), z.array(z.string())])
.optional()
.describe('Name of the entity'),
url: z
.array(z.string())
.optional()
.describe('Origin or homepage URL of the entity'),
phone: z.string().optional().describe('Phone number of the entity'),
email: z.string().optional().describe('Email of the entity'),
employer: z
.string()
.optional()
.describe("Name of the entity's employer (for Person entities)"),
title: z
.string()
.optional()
.describe('Title of the entity (for Person entities)'),
school: z
.string()
.optional()
.describe('School of the entity (for Person entities)'),
location: z.string().optional().describe('Location of the entity'),
ip: z.string().optional().describe('IP address of the entity'),
customId: z.string().optional().describe('User-defined ID for correlation'),
threshold: z.number().optional().describe('Similarity threshold'),
refresh: z
.boolean()
.optional()
.describe(
'If set, will attempt to refresh the entity data by recrawling the source URLs.'
),
search: z
.boolean()
.optional()
.describe(
'If set, will attempt to search the web for the entity and merge the results into its knowledge base.'
),
size: z
.number()
.int()
.positive()
.max(100)
.optional()
.describe('Number of results to return')
})
export type EnhanceEntityOptions = z.infer<typeof EnhanceEntityOptionsSchema>
export interface EnhanceEntityResponse {
version: number
hits: number
kgversion: string
request_ctx: RequestCtx
data: EnhanceEntityResponseDatum[]
errors: any[]
} }
interface Entity { export interface RequestCtx {
query: Query
query_ctx: QueryCtx
}
export interface Query {
type: string
name: string[]
}
export interface QueryCtx {
search: string
}
export interface EnhanceEntityResponseDatum {
score: number
esscore: number
entity: Entity
errors: any[]
}
export interface Entity {
name: string
type: EntityType
id: string
summary?: string
description?: string
homepageUri?: string
twitterUri?: string
linkedInUri?: string
githubUri?: string
crunchbaseUri?: string
googlePlusUri?: string
diffbotUri?: string
educations?: Education[]
nationalities?: Nationality[]
allNames?: string[]
skills?: Skill[]
children?: Children[]
nbOrigins?: number
height?: number
image?: string
images?: Image[]
nbIncomingEdges?: number
nbFollowers?: number
allOriginHashes?: string[]
nameDetail?: NameDetail
parents?: Parent[]
gender?: Gender
importance?: number
origin?: string
wikipediaUri: string
wikipediaPageviewsLastQuarterGrowth?: number
wikipediaPageviewsLastYear?: number
wikipediaPageviewsLastYearGrowth?: number
wikipediaPageviews?: number
wikipediaPageviewsLastQuarter?: number
wikipediaPageviewsGrowth?: number
birthPlace?: BirthPlace
origins: string[]
crawlTimestamp: number
types?: string[]
unions?: Union[]
languages?: Language[]
allUris?: string[]
employments?: Employment[]
birthDate?: DateTime
religion?: Religion
awards?: Award[]
netWorth?: NetWorth
allDescriptions?: string[]
locations?: Location[]
location?: Location
interests?: Interest[]
age?: number
}
export interface Education {
institution: Institution
isCurrent?: boolean
major?: Major
degree?: Degree
from?: DateTime
to?: DateTime
}
export interface Institution {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Major {}
export interface Degree {
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface DateTime {
str: string
precision: number
timestamp: number
}
export interface Nationality {
name: string
type: string
}
export interface Skill {
name: string
diffbotUri?: string
targetDiffbotId?: string
}
export interface Children {
summary: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Image {
url: string
primary?: boolean
}
export interface NameDetail {
firstName: string
lastName: string
middleName?: string[]
}
export interface Parent {
summary: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
image?: string
}
export interface Gender {
normalizedValue: string
}
export interface BirthPlace {
country: Country
isCurrent: boolean
address: string
city: City
subregion: Subregion
latitude: number
precision: number
surfaceForm: string
region: Region
longitude: number
}
export interface Country {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface City {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Subregion {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Region {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Union {
person: Person
from?: DateTime
to?: DateTime
type?: string
}
export interface Person {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Language {
str: string
normalizedValue: string
}
export interface Employment {
isCurrent?: boolean
employer?: Employer
from?: DateTime
categories?: EmploymentCategory[]
title?: string
to?: DateTime
location?: Location
}
export interface Employer {
summary?: string
image?: string image?: string
types?: string[] types?: string[]
name: string name: string
diffbotUri?: string diffbotUri?: string
type: EntityType targetDiffbotId?: string
summary?: string type: string
} }
type EntityType = 'Organization' | 'Place' export interface EmploymentCategory {
types: string[]
interface DiffbotGender { name: string
normalizedValue: string diffbotUri: string
targetDiffbotId: string
type: string
} }
interface DiffbotLocation { export interface Location {
country: Entity country?: Country
isCurrent: boolean isCurrent: boolean
address: string address: string
city: City
street: string
metroArea: MetroArea
subregion: Subregion
latitude: number latitude: number
precision: number precision: number
surfaceForm: string postalCode: string
region: Entity region?: Region
longitude: number longitude: number
} }
interface DiffbotNameDetail { export interface MetroArea {
firstName: string summary: string
lastName: string image: string
} types: string[]
interface DiffbotSkill {
name: string name: string
diffbotUri: string diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Religion {
str: string
}
export interface Award {
title: string
date?: DateTime
}
export interface NetWorth {
currency: string
value: number
}
export interface Interest {
name: string
type: string
} }
} }
export class DiffbotClient { export class DiffbotClient extends AIFunctionsProvider {
readonly ky: KyInstance readonly ky: KyInstance
readonly kyKnowledgeGraph: KyInstance readonly kyKnowledgeGraph: KyInstance
@ -355,6 +663,7 @@ export class DiffbotClient {
apiKey, apiKey,
`DiffbotClient missing required "apiKey" (defaults to "DIFFBOT_API_KEY")` `DiffbotClient missing required "apiKey" (defaults to "DIFFBOT_API_KEY")`
) )
super()
this.apiKey = apiKey this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl this.apiBaseUrl = apiBaseUrl
@ -373,9 +682,88 @@ export class DiffbotClient {
}) })
} }
@aiFunction({
name: 'diffbot_analyze_url',
description:
'Scrapes and extracts structured data from a web page. Also classifies the web page as one of several types (article, product, discussion, job, image, video, list, event, or other).',
inputSchema: z.object({
url: z.string().url().describe('The URL to process.')
})
})
async analyzeUrl(options: diffbot.ExtractAnalyzeOptions) {
return this._extract<diffbot.ExtractAnalyzeResponse>('v3/analyze', options)
}
@aiFunction({
name: 'diffbot_extract_article_from_url',
description:
'Scrapes and extracts clean article text from news articles, blog posts, and other text-heavy web pages.',
inputSchema: z.object({
url: z.string().url().describe('The URL to process.')
})
})
async extractArticleFromUrl(options: diffbot.ExtractArticleOptions) {
return this._extract<diffbot.ExtractArticleResponse>('v3/article', options)
}
@aiFunction({
name: 'diffbot_enhance_entity',
description:
'Enriches a person or organization entity given partial data. Enhance is an enrichment API to find a person or organization using partial data as input. Enhance scores several candidates against the submitted query and returns the best match. More information in the query helps Enhance models estimate with more confidence and will typically result in better matches and a higher score for the matches.',
inputSchema: diffbot.EnhanceEntityOptionsSchema.omit({
refresh: true,
search: true,
customId: true,
threshold: true
})
})
async enhanceEntity(opts: diffbot.EnhanceEntityOptions) {
const { name, url, ...params } = opts
// TODO: clean this array handling up...
const arraySearchParams = [
name ? (Array.isArray(name) ? name : [name]).map((v) => ['name', v]) : [],
url?.map((v) => ['url', v])
]
.filter(Boolean)
.flat()
return this.kyKnowledgeGraph
.get('kg/v3/enhance', {
searchParams: new URLSearchParams([
...arraySearchParams,
...Object.entries(params).map(([key, value]) => [key, String(value)]),
['token', this.apiKey]
])
})
.json<diffbot.EnhanceEntityResponse>()
}
async searchKnowledgeGraph(options: diffbot.KnowledgeGraphSearchOptions) {
return this.kyKnowledgeGraph
.get('kg/v3/dql', {
searchParams: {
...options,
token: this.apiKey
}
})
.json<diffbot.KnowledgeGraphResponse>()
}
async enhanceKnowledgeGraph(options: diffbot.KnowledgeGraphEnhanceOptions) {
return this.kyKnowledgeGraph
.get('kg/v3/enhance', {
searchParams: {
...options,
token: this.apiKey
}
})
.json<diffbot.KnowledgeGraphResponse>()
}
protected async _extract< protected async _extract<
T extends diffbot.DiffbotExtractResponse = diffbot.DiffbotExtractResponse T extends diffbot.ExtractResponse = diffbot.ExtractResponse
>(endpoint: string, options: diffbot.DiffbotExtractOptions): Promise<T> { >(endpoint: string, options: diffbot.ExtractOptions): Promise<T> {
const { customJs, customHeaders, ...rest } = options const { customJs, customHeaders, ...rest } = options
const searchParams: Record<string, any> = { const searchParams: Record<string, any> = {
...rest, ...rest,
@ -404,44 +792,4 @@ export class DiffbotClient {
}) })
.json<T>() .json<T>()
} }
async extractAnalyze(options: diffbot.DiffbotExtractAnalyzeOptions) {
return this._extract<diffbot.DiffbotExtractAnalyzeResponse>(
'v3/analyze',
options
)
}
async extractArticle(options: diffbot.DiffbotExtractArticleOptions) {
return this._extract<diffbot.DiffbotExtractArticleResponse>(
'v3/article',
options
)
}
async knowledgeGraphSearch(
options: diffbot.DiffbotKnowledgeGraphSearchOptions
) {
return this.kyKnowledgeGraph
.get('kg/v3/dql', {
searchParams: {
...options,
token: this.apiKey
}
})
.json<diffbot.DiffbotKnowledgeGraphResponse>()
}
async knowledgeGraphEnhance(
options: diffbot.DiffbotKnowledgeGraphEnhanceOptions
) {
return this.kyKnowledgeGraph
.get('kg/v3/enhance', {
searchParams: {
...options,
token: this.apiKey
}
})
.json<diffbot.DiffbotKnowledgeGraphResponse>()
}
} }