📵

2024-06-03 02:09:12 -05:00 · 2024-06-03 02:09:12 -05:00 · 7731ca09bc
commit 7731ca09bc
--- a/src/services/exa-client.ts
+++ b/src/services/exa-client.ts
@ -1,135 +1,160 @@
 import defaultKy, { type KyInstance } from 'ky'
+import { z } from 'zod'

+import { aiFunction, AIFunctionsProvider } from '../fns.js'
 import { assert, getEnv } from '../utils.js'

 export namespace exa {
-  /**
-   * Search options for performing a search query.
-   */
-  export type BaseSearchOptions = {
-    /** Number of search results to return. Default 10. Max 10 for basic plans. */
-    numResults?: number
-    /** List of domains to include in the search. */
-    includeDomains?: string[]
-    /** List of domains to exclude in the search. */
-    excludeDomains?: string[]
-    /** Start date for results based on crawl date. */
-    startCrawlDate?: string
-    /** End date for results based on crawl date. */
-    endCrawlDate?: string
-    /** Start date for results based on published date. */
-    startPublishedDate?: string
-    /** End date for results based on published date. */
-    endPublishedDate?: string
-    /** A data category to focus on, with higher comprehensivity and data cleanliness. Currently, the only category is company. */
-    category?: string
-  }
+  export const TextContentsOptionsSchema = z.object({
+    maxCharacters: z
+      .number()
+      .optional()
+      .describe('The maximum number of characters to return.'),
+    includeHtmlTags: z
+      .boolean()
+      .optional()
+      .describe('If true, includes HTML tags in the returned text.')
+  })
+  export type TextContentsOptions = z.infer<typeof TextContentsOptionsSchema>

-  /**
-   * Search options for performing a search query.
-   */
-  export type RegularSearchOptions = BaseSearchOptions & {
-    /** If true, converts query to a Metaphor query. */
-    useAutoprompt?: boolean
-    /** Type of search, 'keyword' or 'neural'. */
-    type?: string
-  }
-
-  /**
-   * Options for finding similar links.
-   */
-  export type FindSimilarOptions = BaseSearchOptions & {
-    /** If true, excludes links from the base domain of the input. */
-    excludeSourceDomain?: boolean
-  }
-
-  /**
-   * Search options for performing a search query.
-   */
-  export type ContentsOptions = {
-    /** Options for retrieving text contents. */
-    text?: TextContentsOptions | true
-    /** Options for retrieving highlights. */
-    highlights?: HighlightsContentsOptions | true
-  }
-
-  /**
-   * Options for retrieving text from page.
-   */
-  export type TextContentsOptions = {
-    /** The maximum number of characters to return. */
-    maxCharacters?: number
-    /** If true, includes HTML tags in the returned text. Default: false */
-    includeHtmlTags?: boolean
-  }
-
-  /**
-   * Options for retrieving highlights from page.
-   * @typedef {Object} HighlightsContentsOptions
-   */
-  export type HighlightsContentsOptions = {
-    /** The query string to use for highlights search. */
-    query?: string
-    /** The number of sentences to return for each highlight. */
-    numSentences?: number
-    /** The number of highlights to return for each URL. */
-    highlightsPerUrl?: number
-  }
-
-  export type TextResponse = {
-    /** Text from page */
-    text: string
-  }
-
-  export type HighlightsResponse = {
-    /** The highlights as an array of strings. */
-    highlights: string[]
-    /** The corresponding scores as an array of floats, 0 to 1 */
-    highlightScores: number[]
-  }
-
-  export type Default<T extends {}, U> = [keyof T] extends [never] ? U : T
-
-  /**
-   * Depending on 'ContentsOptions', this yields either a 'TextResponse',
-   * a 'HighlightsResponse', both, or an empty object.
-   */
-  export type ContentsResultComponent<T extends ContentsOptions> = Default<
-    (T['text'] extends object | true ? TextResponse : {}) &
-      (T['highlights'] extends object | true ? HighlightsResponse : {}),
-    TextResponse
+  export const HighlightsContentsOptionsSchema = z.object({
+    query: z
+      .string()
+      .optional()
+      .describe('The query string to use for highlights search.'),
+    numSentences: z
+      .number()
+      .optional()
+      .describe('The number of sentences to return for each highlight.'),
+    highlightsPerUrl: z
+      .number()
+      .optional()
+      .describe('The number of highlights to return for each URL.')
+  })
+  export type HighlightsContentsOptions = z.infer<
+    typeof HighlightsContentsOptionsSchema
  >

+  export const ContentsOptionsSchema = z.object({
+    text: z.union([TextContentsOptionsSchema, z.literal(true)]).optional(),
+    highlights: z
+      .union([HighlightsContentsOptionsSchema, z.literal(true)])
+      .optional()
+  })
+  export type ContentsOptions = z.infer<typeof ContentsOptionsSchema>
+
+  export const BaseSearchOptionsSchema = z.object({
+    numResults: z
+      .number()
+      .optional()
+      .describe('Number of search results to return.'),
+    includeDomains: z
+      .array(z.string())
+      .optional()
+      .describe('List of domains to include in the search.'),
+    excludeDomains: z
+      .array(z.string())
+      .optional()
+      .describe('List of domains to exclude from the search.'),
+    startCrawlDate: z
+      .string()
+      .optional()
+      .describe('Start date for results based on crawl date.'),
+    endCrawlDate: z
+      .string()
+      .optional()
+      .describe('End date for results based on crawl date.'),
+    startPublishedDate: z
+      .string()
+      .optional()
+      .describe('Start date for results based on published date.'),
+    endPublishedDate: z
+      .string()
+      .optional()
+      .describe('End date for results based on published date.'),
+    category: z
+      .string()
+      .optional()
+      .describe(
+        'A data category to focus on, with higher comprehensivity and data cleanliness. Currently, the only category is company.'
+      ),
+    contents: ContentsOptionsSchema.optional().describe(
+      'Whether to include the contents of the search results.'
+    )
+  })
+  export type BaseSearchOptions = z.infer<typeof BaseSearchOptionsSchema>
+
+  export const RegularSearchOptionsSchema = BaseSearchOptionsSchema.extend({
+    query: z.string().describe('search query'),
+    useAutoprompt: z.boolean().optional(),
+    type: z.enum(['keyword', 'neural', 'magic']).optional()
+  })
+  export type RegularSearchOptions = z.infer<typeof RegularSearchOptionsSchema>
+
+  export const FindSimilarOptionsSchema = BaseSearchOptionsSchema.extend({
+    url: z
+      .string()
+      .describe('The url for which you would like to find similar links'),
+    excludeSourceDomain: z
+      .boolean()
+      .optional()
+      .describe('If true, excludes links from the base domain of the input.')
+  })
+  export type FindSimilarOptions = z.infer<typeof FindSimilarOptionsSchema>
+
+  export const GetContentsOptionsSchema = ContentsOptionsSchema.extend({
+    ids: z
+      .array(z.string())
+      .nonempty()
+      .describe('Exa IDs of the documents to retrieve.')
+  })
+  export type GetContentsOptions = z.infer<typeof GetContentsOptionsSchema>
+
  /**
   * Represents a search result object.
   */
-  export type SearchResult<T extends ContentsOptions = ContentsOptions> = {
+  export type SearchResult = {
    /** The title of the search result. */
    title: string | null
+
    /** The URL of the search result. */
    url: string
+
    /** The estimated creation date of the content. */
    publishedDate?: string
+
    /** The author of the content, if available. */
    author?: string
+
    /** Similarity score between the query/url and the result. */
    score?: number
-    /** The temporary ID for the document. */
+
+    /** The temporary Exa ID for the document. */
    id: string
-  } & ContentsResultComponent<T>
+
+    /** Text from page */
+    text?: string
+
+    /** The highlights as an array of strings. */
+    highlights?: string[]
+
+    /** The corresponding scores as an array of floats, 0 to 1 */
+    highlightScores?: number[]
+  }

  /**
   * Represents a search response object.
   */
-  export type SearchResponse<T extends ContentsOptions = ContentsOptions> = {
+  export type SearchResponse = {
    /** The list of search results. */
-    results: SearchResult<T>[]
+    results: SearchResult[]
+
    /** The autoprompt string, if applicable. */
    autopromptString?: string
  }
 }

-export class ExaClient {
+export class ExaClient extends AIFunctionsProvider {
  readonly apiKey: string
  readonly apiBaseUrl: string
  readonly ky: KyInstance
@ -147,6 +172,7 @@ export class ExaClient {
      apiKey,
      'ExaClient missing required "apiKey" (defaults to "EXA_API_KEY")'
    )
+    super()

    this.apiKey = apiKey
    this.apiBaseUrl = apiBaseUrl
@ -162,103 +188,52 @@ export class ExaClient {
  /**
   * Performs an Exa search for the given query.
   */
-  async search(opts: { query: string } & exa.RegularSearchOptions) {
-    return this.ky.get('search', { json: opts }).json<exa.SearchResponse>()
-  }
+  @aiFunction({
+    name: 'exa_search',
+    description: 'Search the web for the given query.',
+    inputSchema: exa.RegularSearchOptionsSchema
+  })
+  async search(queryOrOpts: string | exa.RegularSearchOptions) {
+    const json =
+      typeof queryOrOpts === 'string' ? { query: queryOrOpts } : queryOrOpts

-  /**
-   * Performs a search with a Exa prompt-engineered query and returns the
-   * contents of the documents.
-   */
-  async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
-    query,
-    text,
-    highlights,
-    ...rest
-  }: { query: string } & exa.RegularSearchOptions & T) {
-    return this.ky
-      .post('search', {
-        json: {
-          query,
-          contents:
-            !text && !highlights
-              ? { text: true }
-              : {
-                  ...(text ? { text } : {}),
-                  ...(highlights ? { highlights } : {})
-                },
-          ...rest
-        }
-      })
-      .json<exa.SearchResponse<T>>()
+    return this.ky.post('search', { json }).json<exa.SearchResponse>()
  }

  /**
   * Finds similar links to the provided URL.
   */
-  async findSimilar(opts: { url: string } & exa.FindSimilarOptions) {
+  @aiFunction({
+    name: 'exa_find_similar',
+    description: 'Find similar links to the provided URL.',
+    inputSchema: exa.FindSimilarOptionsSchema
+  })
+  async findSimilar(opts: exa.FindSimilarOptions) {
    return this.ky
      .post('findSimilar', { json: opts })
      .json<exa.SearchResponse>()
  }

  /**
-   * Finds similar links to the provided URL and returns the contents of the
-   * documents.
+   * Retrieves contents of documents based on a list of Exa document IDs.
   */
-  async findSimilarAndContents<
-    T extends exa.ContentsOptions = exa.ContentsOptions
-  >({
-    url,
-    text,
-    highlights,
-    ...rest
-  }: { url: string } & exa.FindSimilarOptions & T) {
-    return this.ky
-      .post('findSimilar', {
-        json: {
-          url,
-          contents:
-            !text && !highlights
-              ? { text: true }
-              : {
-                  ...(text ? { text } : {}),
-                  ...(highlights ? { highlights } : {})
-                },
-          ...rest
-        }
-      })
-      .json<exa.SearchResponse<T>>()
-  }
-
-  /**
-   * Retrieves contents of documents based on a list of document IDs.
-   */
-  async getContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
-    ids,
-    ...opts
-  }: { ids: string | string[] | exa.SearchResult[] } & T) {
-    let requestIds: string[]
-
-    if (typeof ids === 'string') {
-      requestIds = [ids]
-    } else if (typeof ids[0] === 'string') {
-      requestIds = ids as string[]
-    } else {
-      requestIds = (ids as exa.SearchResult[]).map((result) => result.id)
-    }
-
-    if (ids.length === 0) {
-      throw new Error('Must provide at least one ID')
-    }
+  @aiFunction({
+    name: 'exa_get_contents',
+    description:
+      'Retrieve contents of documents based on a list of Exa document IDs.',
+    inputSchema: exa.GetContentsOptionsSchema
+  })
+  async getContents({ ids, ...opts }: exa.GetContentsOptions) {
+    const documentIDs = Array.isArray(ids) ? ids : [ids]
+    assert(documentIDs.length, 'Must provide at least one document ID')

    return this.ky
      .post('contents', {
        json: {
          ...opts,
-          ids: requestIds
+          ids: documentIDs
        }
      })
-      .json<exa.SearchResponse<T>>()
+      .json<exa.SearchResponse>()
  }
 }
--- a/src/services/firecrawl-client.ts
+++ b/src/services/firecrawl-client.ts
@ -5,7 +5,8 @@ import { aiFunction, AIFunctionsProvider } from '../fns.js'
 import { assert, delay, getEnv } from '../utils.js'
 import { zodToJsonSchema } from '../zod-to-json-schema.js'

-// TODO: Deprioritizing this client for now because the API doesn't seem to be stable.
+// TODO: Deprioritizing this client for now because the API doesn't seem to be
+// stable.

 export namespace firecrawl {
  /**