From 7731ca09bcf41b11e14eb378f84a3d3c84060e77 Mon Sep 17 00:00:00 2001 From: Travis Fischer Date: Mon, 3 Jun 2024 02:09:12 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/services/exa-client.ts | 331 ++++++++++++++----------------- src/services/firecrawl-client.ts | 3 +- 2 files changed, 155 insertions(+), 179 deletions(-) diff --git a/src/services/exa-client.ts b/src/services/exa-client.ts index 76d0495..b61a251 100644 --- a/src/services/exa-client.ts +++ b/src/services/exa-client.ts @@ -1,135 +1,160 @@ import defaultKy, { type KyInstance } from 'ky' +import { z } from 'zod' +import { aiFunction, AIFunctionsProvider } from '../fns.js' import { assert, getEnv } from '../utils.js' export namespace exa { - /** - * Search options for performing a search query. - */ - export type BaseSearchOptions = { - /** Number of search results to return. Default 10. Max 10 for basic plans. */ - numResults?: number - /** List of domains to include in the search. */ - includeDomains?: string[] - /** List of domains to exclude in the search. */ - excludeDomains?: string[] - /** Start date for results based on crawl date. */ - startCrawlDate?: string - /** End date for results based on crawl date. */ - endCrawlDate?: string - /** Start date for results based on published date. */ - startPublishedDate?: string - /** End date for results based on published date. */ - endPublishedDate?: string - /** A data category to focus on, with higher comprehensivity and data cleanliness. Currently, the only category is company. */ - category?: string - } + export const TextContentsOptionsSchema = z.object({ + maxCharacters: z + .number() + .optional() + .describe('The maximum number of characters to return.'), + includeHtmlTags: z + .boolean() + .optional() + .describe('If true, includes HTML tags in the returned text.') + }) + export type TextContentsOptions = z.infer - /** - * Search options for performing a search query. - */ - export type RegularSearchOptions = BaseSearchOptions & { - /** If true, converts query to a Metaphor query. */ - useAutoprompt?: boolean - /** Type of search, 'keyword' or 'neural'. */ - type?: string - } - - /** - * Options for finding similar links. - */ - export type FindSimilarOptions = BaseSearchOptions & { - /** If true, excludes links from the base domain of the input. */ - excludeSourceDomain?: boolean - } - - /** - * Search options for performing a search query. - */ - export type ContentsOptions = { - /** Options for retrieving text contents. */ - text?: TextContentsOptions | true - /** Options for retrieving highlights. */ - highlights?: HighlightsContentsOptions | true - } - - /** - * Options for retrieving text from page. - */ - export type TextContentsOptions = { - /** The maximum number of characters to return. */ - maxCharacters?: number - /** If true, includes HTML tags in the returned text. Default: false */ - includeHtmlTags?: boolean - } - - /** - * Options for retrieving highlights from page. - * @typedef {Object} HighlightsContentsOptions - */ - export type HighlightsContentsOptions = { - /** The query string to use for highlights search. */ - query?: string - /** The number of sentences to return for each highlight. */ - numSentences?: number - /** The number of highlights to return for each URL. */ - highlightsPerUrl?: number - } - - export type TextResponse = { - /** Text from page */ - text: string - } - - export type HighlightsResponse = { - /** The highlights as an array of strings. */ - highlights: string[] - /** The corresponding scores as an array of floats, 0 to 1 */ - highlightScores: number[] - } - - export type Default = [keyof T] extends [never] ? U : T - - /** - * Depending on 'ContentsOptions', this yields either a 'TextResponse', - * a 'HighlightsResponse', both, or an empty object. - */ - export type ContentsResultComponent = Default< - (T['text'] extends object | true ? TextResponse : {}) & - (T['highlights'] extends object | true ? HighlightsResponse : {}), - TextResponse + export const HighlightsContentsOptionsSchema = z.object({ + query: z + .string() + .optional() + .describe('The query string to use for highlights search.'), + numSentences: z + .number() + .optional() + .describe('The number of sentences to return for each highlight.'), + highlightsPerUrl: z + .number() + .optional() + .describe('The number of highlights to return for each URL.') + }) + export type HighlightsContentsOptions = z.infer< + typeof HighlightsContentsOptionsSchema > + export const ContentsOptionsSchema = z.object({ + text: z.union([TextContentsOptionsSchema, z.literal(true)]).optional(), + highlights: z + .union([HighlightsContentsOptionsSchema, z.literal(true)]) + .optional() + }) + export type ContentsOptions = z.infer + + export const BaseSearchOptionsSchema = z.object({ + numResults: z + .number() + .optional() + .describe('Number of search results to return.'), + includeDomains: z + .array(z.string()) + .optional() + .describe('List of domains to include in the search.'), + excludeDomains: z + .array(z.string()) + .optional() + .describe('List of domains to exclude from the search.'), + startCrawlDate: z + .string() + .optional() + .describe('Start date for results based on crawl date.'), + endCrawlDate: z + .string() + .optional() + .describe('End date for results based on crawl date.'), + startPublishedDate: z + .string() + .optional() + .describe('Start date for results based on published date.'), + endPublishedDate: z + .string() + .optional() + .describe('End date for results based on published date.'), + category: z + .string() + .optional() + .describe( + 'A data category to focus on, with higher comprehensivity and data cleanliness. Currently, the only category is company.' + ), + contents: ContentsOptionsSchema.optional().describe( + 'Whether to include the contents of the search results.' + ) + }) + export type BaseSearchOptions = z.infer + + export const RegularSearchOptionsSchema = BaseSearchOptionsSchema.extend({ + query: z.string().describe('search query'), + useAutoprompt: z.boolean().optional(), + type: z.enum(['keyword', 'neural', 'magic']).optional() + }) + export type RegularSearchOptions = z.infer + + export const FindSimilarOptionsSchema = BaseSearchOptionsSchema.extend({ + url: z + .string() + .describe('The url for which you would like to find similar links'), + excludeSourceDomain: z + .boolean() + .optional() + .describe('If true, excludes links from the base domain of the input.') + }) + export type FindSimilarOptions = z.infer + + export const GetContentsOptionsSchema = ContentsOptionsSchema.extend({ + ids: z + .array(z.string()) + .nonempty() + .describe('Exa IDs of the documents to retrieve.') + }) + export type GetContentsOptions = z.infer + /** * Represents a search result object. */ - export type SearchResult = { + export type SearchResult = { /** The title of the search result. */ title: string | null + /** The URL of the search result. */ url: string + /** The estimated creation date of the content. */ publishedDate?: string + /** The author of the content, if available. */ author?: string + /** Similarity score between the query/url and the result. */ score?: number - /** The temporary ID for the document. */ + + /** The temporary Exa ID for the document. */ id: string - } & ContentsResultComponent + + /** Text from page */ + text?: string + + /** The highlights as an array of strings. */ + highlights?: string[] + + /** The corresponding scores as an array of floats, 0 to 1 */ + highlightScores?: number[] + } /** * Represents a search response object. */ - export type SearchResponse = { + export type SearchResponse = { /** The list of search results. */ - results: SearchResult[] + results: SearchResult[] + /** The autoprompt string, if applicable. */ autopromptString?: string } } -export class ExaClient { +export class ExaClient extends AIFunctionsProvider { readonly apiKey: string readonly apiBaseUrl: string readonly ky: KyInstance @@ -147,6 +172,7 @@ export class ExaClient { apiKey, 'ExaClient missing required "apiKey" (defaults to "EXA_API_KEY")' ) + super() this.apiKey = apiKey this.apiBaseUrl = apiBaseUrl @@ -162,103 +188,52 @@ export class ExaClient { /** * Performs an Exa search for the given query. */ - async search(opts: { query: string } & exa.RegularSearchOptions) { - return this.ky.get('search', { json: opts }).json() - } + @aiFunction({ + name: 'exa_search', + description: 'Search the web for the given query.', + inputSchema: exa.RegularSearchOptionsSchema + }) + async search(queryOrOpts: string | exa.RegularSearchOptions) { + const json = + typeof queryOrOpts === 'string' ? { query: queryOrOpts } : queryOrOpts - /** - * Performs a search with a Exa prompt-engineered query and returns the - * contents of the documents. - */ - async searchAndContents({ - query, - text, - highlights, - ...rest - }: { query: string } & exa.RegularSearchOptions & T) { - return this.ky - .post('search', { - json: { - query, - contents: - !text && !highlights - ? { text: true } - : { - ...(text ? { text } : {}), - ...(highlights ? { highlights } : {}) - }, - ...rest - } - }) - .json>() + return this.ky.post('search', { json }).json() } /** * Finds similar links to the provided URL. */ - async findSimilar(opts: { url: string } & exa.FindSimilarOptions) { + @aiFunction({ + name: 'exa_find_similar', + description: 'Find similar links to the provided URL.', + inputSchema: exa.FindSimilarOptionsSchema + }) + async findSimilar(opts: exa.FindSimilarOptions) { return this.ky .post('findSimilar', { json: opts }) .json() } /** - * Finds similar links to the provided URL and returns the contents of the - * documents. + * Retrieves contents of documents based on a list of Exa document IDs. */ - async findSimilarAndContents< - T extends exa.ContentsOptions = exa.ContentsOptions - >({ - url, - text, - highlights, - ...rest - }: { url: string } & exa.FindSimilarOptions & T) { - return this.ky - .post('findSimilar', { - json: { - url, - contents: - !text && !highlights - ? { text: true } - : { - ...(text ? { text } : {}), - ...(highlights ? { highlights } : {}) - }, - ...rest - } - }) - .json>() - } - - /** - * Retrieves contents of documents based on a list of document IDs. - */ - async getContents({ - ids, - ...opts - }: { ids: string | string[] | exa.SearchResult[] } & T) { - let requestIds: string[] - - if (typeof ids === 'string') { - requestIds = [ids] - } else if (typeof ids[0] === 'string') { - requestIds = ids as string[] - } else { - requestIds = (ids as exa.SearchResult[]).map((result) => result.id) - } - - if (ids.length === 0) { - throw new Error('Must provide at least one ID') - } + @aiFunction({ + name: 'exa_get_contents', + description: + 'Retrieve contents of documents based on a list of Exa document IDs.', + inputSchema: exa.GetContentsOptionsSchema + }) + async getContents({ ids, ...opts }: exa.GetContentsOptions) { + const documentIDs = Array.isArray(ids) ? ids : [ids] + assert(documentIDs.length, 'Must provide at least one document ID') return this.ky .post('contents', { json: { ...opts, - ids: requestIds + ids: documentIDs } }) - .json>() + .json() } } diff --git a/src/services/firecrawl-client.ts b/src/services/firecrawl-client.ts index cd7157d..134d1d4 100644 --- a/src/services/firecrawl-client.ts +++ b/src/services/firecrawl-client.ts @@ -5,7 +5,8 @@ import { aiFunction, AIFunctionsProvider } from '../fns.js' import { assert, delay, getEnv } from '../utils.js' import { zodToJsonSchema } from '../zod-to-json-schema.js' -// TODO: Deprioritizing this client for now because the API doesn't seem to be stable. +// TODO: Deprioritizing this client for now because the API doesn't seem to be +// stable. export namespace firecrawl { /**