pull/643/head^2
Travis Fischer 2024-06-03 02:09:12 -05:00
rodzic ca9254d567
commit 7731ca09bc
2 zmienionych plików z 155 dodań i 179 usunięć

Wyświetl plik

@ -1,135 +1,160 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv } from '../utils.js'
export namespace exa {
/**
* Search options for performing a search query.
*/
export type BaseSearchOptions = {
/** Number of search results to return. Default 10. Max 10 for basic plans. */
numResults?: number
/** List of domains to include in the search. */
includeDomains?: string[]
/** List of domains to exclude in the search. */
excludeDomains?: string[]
/** Start date for results based on crawl date. */
startCrawlDate?: string
/** End date for results based on crawl date. */
endCrawlDate?: string
/** Start date for results based on published date. */
startPublishedDate?: string
/** End date for results based on published date. */
endPublishedDate?: string
/** A data category to focus on, with higher comprehensivity and data cleanliness. Currently, the only category is company. */
category?: string
}
export const TextContentsOptionsSchema = z.object({
maxCharacters: z
.number()
.optional()
.describe('The maximum number of characters to return.'),
includeHtmlTags: z
.boolean()
.optional()
.describe('If true, includes HTML tags in the returned text.')
})
export type TextContentsOptions = z.infer<typeof TextContentsOptionsSchema>
/**
* Search options for performing a search query.
*/
export type RegularSearchOptions = BaseSearchOptions & {
/** If true, converts query to a Metaphor query. */
useAutoprompt?: boolean
/** Type of search, 'keyword' or 'neural'. */
type?: string
}
/**
* Options for finding similar links.
*/
export type FindSimilarOptions = BaseSearchOptions & {
/** If true, excludes links from the base domain of the input. */
excludeSourceDomain?: boolean
}
/**
* Search options for performing a search query.
*/
export type ContentsOptions = {
/** Options for retrieving text contents. */
text?: TextContentsOptions | true
/** Options for retrieving highlights. */
highlights?: HighlightsContentsOptions | true
}
/**
* Options for retrieving text from page.
*/
export type TextContentsOptions = {
/** The maximum number of characters to return. */
maxCharacters?: number
/** If true, includes HTML tags in the returned text. Default: false */
includeHtmlTags?: boolean
}
/**
* Options for retrieving highlights from page.
* @typedef {Object} HighlightsContentsOptions
*/
export type HighlightsContentsOptions = {
/** The query string to use for highlights search. */
query?: string
/** The number of sentences to return for each highlight. */
numSentences?: number
/** The number of highlights to return for each URL. */
highlightsPerUrl?: number
}
export type TextResponse = {
/** Text from page */
text: string
}
export type HighlightsResponse = {
/** The highlights as an array of strings. */
highlights: string[]
/** The corresponding scores as an array of floats, 0 to 1 */
highlightScores: number[]
}
export type Default<T extends {}, U> = [keyof T] extends [never] ? U : T
/**
* Depending on 'ContentsOptions', this yields either a 'TextResponse',
* a 'HighlightsResponse', both, or an empty object.
*/
export type ContentsResultComponent<T extends ContentsOptions> = Default<
(T['text'] extends object | true ? TextResponse : {}) &
(T['highlights'] extends object | true ? HighlightsResponse : {}),
TextResponse
export const HighlightsContentsOptionsSchema = z.object({
query: z
.string()
.optional()
.describe('The query string to use for highlights search.'),
numSentences: z
.number()
.optional()
.describe('The number of sentences to return for each highlight.'),
highlightsPerUrl: z
.number()
.optional()
.describe('The number of highlights to return for each URL.')
})
export type HighlightsContentsOptions = z.infer<
typeof HighlightsContentsOptionsSchema
>
export const ContentsOptionsSchema = z.object({
text: z.union([TextContentsOptionsSchema, z.literal(true)]).optional(),
highlights: z
.union([HighlightsContentsOptionsSchema, z.literal(true)])
.optional()
})
export type ContentsOptions = z.infer<typeof ContentsOptionsSchema>
export const BaseSearchOptionsSchema = z.object({
numResults: z
.number()
.optional()
.describe('Number of search results to return.'),
includeDomains: z
.array(z.string())
.optional()
.describe('List of domains to include in the search.'),
excludeDomains: z
.array(z.string())
.optional()
.describe('List of domains to exclude from the search.'),
startCrawlDate: z
.string()
.optional()
.describe('Start date for results based on crawl date.'),
endCrawlDate: z
.string()
.optional()
.describe('End date for results based on crawl date.'),
startPublishedDate: z
.string()
.optional()
.describe('Start date for results based on published date.'),
endPublishedDate: z
.string()
.optional()
.describe('End date for results based on published date.'),
category: z
.string()
.optional()
.describe(
'A data category to focus on, with higher comprehensivity and data cleanliness. Currently, the only category is company.'
),
contents: ContentsOptionsSchema.optional().describe(
'Whether to include the contents of the search results.'
)
})
export type BaseSearchOptions = z.infer<typeof BaseSearchOptionsSchema>
export const RegularSearchOptionsSchema = BaseSearchOptionsSchema.extend({
query: z.string().describe('search query'),
useAutoprompt: z.boolean().optional(),
type: z.enum(['keyword', 'neural', 'magic']).optional()
})
export type RegularSearchOptions = z.infer<typeof RegularSearchOptionsSchema>
export const FindSimilarOptionsSchema = BaseSearchOptionsSchema.extend({
url: z
.string()
.describe('The url for which you would like to find similar links'),
excludeSourceDomain: z
.boolean()
.optional()
.describe('If true, excludes links from the base domain of the input.')
})
export type FindSimilarOptions = z.infer<typeof FindSimilarOptionsSchema>
export const GetContentsOptionsSchema = ContentsOptionsSchema.extend({
ids: z
.array(z.string())
.nonempty()
.describe('Exa IDs of the documents to retrieve.')
})
export type GetContentsOptions = z.infer<typeof GetContentsOptionsSchema>
/**
* Represents a search result object.
*/
export type SearchResult<T extends ContentsOptions = ContentsOptions> = {
export type SearchResult = {
/** The title of the search result. */
title: string | null
/** The URL of the search result. */
url: string
/** The estimated creation date of the content. */
publishedDate?: string
/** The author of the content, if available. */
author?: string
/** Similarity score between the query/url and the result. */
score?: number
/** The temporary ID for the document. */
/** The temporary Exa ID for the document. */
id: string
} & ContentsResultComponent<T>
/** Text from page */
text?: string
/** The highlights as an array of strings. */
highlights?: string[]
/** The corresponding scores as an array of floats, 0 to 1 */
highlightScores?: number[]
}
/**
* Represents a search response object.
*/
export type SearchResponse<T extends ContentsOptions = ContentsOptions> = {
export type SearchResponse = {
/** The list of search results. */
results: SearchResult<T>[]
results: SearchResult[]
/** The autoprompt string, if applicable. */
autopromptString?: string
}
}
export class ExaClient {
export class ExaClient extends AIFunctionsProvider {
readonly apiKey: string
readonly apiBaseUrl: string
readonly ky: KyInstance
@ -147,6 +172,7 @@ export class ExaClient {
apiKey,
'ExaClient missing required "apiKey" (defaults to "EXA_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
@ -162,103 +188,52 @@ export class ExaClient {
/**
* Performs an Exa search for the given query.
*/
async search(opts: { query: string } & exa.RegularSearchOptions) {
return this.ky.get('search', { json: opts }).json<exa.SearchResponse>()
}
@aiFunction({
name: 'exa_search',
description: 'Search the web for the given query.',
inputSchema: exa.RegularSearchOptionsSchema
})
async search(queryOrOpts: string | exa.RegularSearchOptions) {
const json =
typeof queryOrOpts === 'string' ? { query: queryOrOpts } : queryOrOpts
/**
* Performs a search with a Exa prompt-engineered query and returns the
* contents of the documents.
*/
async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
query,
text,
highlights,
...rest
}: { query: string } & exa.RegularSearchOptions & T) {
return this.ky
.post('search', {
json: {
query,
contents:
!text && !highlights
? { text: true }
: {
...(text ? { text } : {}),
...(highlights ? { highlights } : {})
},
...rest
}
})
.json<exa.SearchResponse<T>>()
return this.ky.post('search', { json }).json<exa.SearchResponse>()
}
/**
* Finds similar links to the provided URL.
*/
async findSimilar(opts: { url: string } & exa.FindSimilarOptions) {
@aiFunction({
name: 'exa_find_similar',
description: 'Find similar links to the provided URL.',
inputSchema: exa.FindSimilarOptionsSchema
})
async findSimilar(opts: exa.FindSimilarOptions) {
return this.ky
.post('findSimilar', { json: opts })
.json<exa.SearchResponse>()
}
/**
* Finds similar links to the provided URL and returns the contents of the
* documents.
* Retrieves contents of documents based on a list of Exa document IDs.
*/
async findSimilarAndContents<
T extends exa.ContentsOptions = exa.ContentsOptions
>({
url,
text,
highlights,
...rest
}: { url: string } & exa.FindSimilarOptions & T) {
return this.ky
.post('findSimilar', {
json: {
url,
contents:
!text && !highlights
? { text: true }
: {
...(text ? { text } : {}),
...(highlights ? { highlights } : {})
},
...rest
}
})
.json<exa.SearchResponse<T>>()
}
/**
* Retrieves contents of documents based on a list of document IDs.
*/
async getContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
ids,
...opts
}: { ids: string | string[] | exa.SearchResult[] } & T) {
let requestIds: string[]
if (typeof ids === 'string') {
requestIds = [ids]
} else if (typeof ids[0] === 'string') {
requestIds = ids as string[]
} else {
requestIds = (ids as exa.SearchResult[]).map((result) => result.id)
}
if (ids.length === 0) {
throw new Error('Must provide at least one ID')
}
@aiFunction({
name: 'exa_get_contents',
description:
'Retrieve contents of documents based on a list of Exa document IDs.',
inputSchema: exa.GetContentsOptionsSchema
})
async getContents({ ids, ...opts }: exa.GetContentsOptions) {
const documentIDs = Array.isArray(ids) ? ids : [ids]
assert(documentIDs.length, 'Must provide at least one document ID')
return this.ky
.post('contents', {
json: {
...opts,
ids: requestIds
ids: documentIDs
}
})
.json<exa.SearchResponse<T>>()
.json<exa.SearchResponse>()
}
}

Wyświetl plik

@ -5,7 +5,8 @@ import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, delay, getEnv } from '../utils.js'
import { zodToJsonSchema } from '../zod-to-json-schema.js'
// TODO: Deprioritizing this client for now because the API doesn't seem to be stable.
// TODO: Deprioritizing this client for now because the API doesn't seem to be
// stable.
export namespace firecrawl {
/**