📵

2024-06-03 02:09:12 -05:00 · 2024-06-03 02:09:12 -05:00 · 7731ca09bc
commit 7731ca09bc
--- a/src/services/exa-client.ts
+++ b/src/services/exa-client.ts
@ -1,135 +1,160 @@
 import defaultKy, { type KyInstance } from 'ky'
 import { z } from 'zod'
 import { aiFunction, AIFunctionsProvider } from '../fns.js'
 import { assert, getEnv } from '../utils.js'
 export namespace exa {
-  /**
+  export const TextContentsOptionsSchema = z.object({
-   * Search options for performing a search query.
+    maxCharacters: z
-   */
+      .number()
-  export type BaseSearchOptions = {
+      .optional()
-    /** Number of search results to return. Default 10. Max 10 for basic plans. */
+      .describe('The maximum number of characters to return.'),
-    numResults?: number
+    includeHtmlTags: z
-    /** List of domains to include in the search. */
+      .boolean()
-    includeDomains?: string[]
+      .optional()
-    /** List of domains to exclude in the search. */
+      .describe('If true, includes HTML tags in the returned text.')
-    excludeDomains?: string[]
+  })
-    /** Start date for results based on crawl date. */
+  export type TextContentsOptions = z.infer<typeof TextContentsOptionsSchema>
    startCrawlDate?: string
    /** End date for results based on crawl date. */
    endCrawlDate?: string
    /** Start date for results based on published date. */
    startPublishedDate?: string
    /** End date for results based on published date. */
    endPublishedDate?: string
    /** A data category to focus on, with higher comprehensivity and data cleanliness. Currently, the only category is company. */
    category?: string
  }
-  /**
+  export const HighlightsContentsOptionsSchema = z.object({
-   * Search options for performing a search query.
+    query: z
-   */
+      .string()
-  export type RegularSearchOptions = BaseSearchOptions & {
+      .optional()
-    /** If true, converts query to a Metaphor query. */
+      .describe('The query string to use for highlights search.'),
-    useAutoprompt?: boolean
+    numSentences: z
-    /** Type of search, 'keyword' or 'neural'. */
+      .number()
-    type?: string
+      .optional()
-  }
+      .describe('The number of sentences to return for each highlight.'),
-
+    highlightsPerUrl: z
-  /**
+      .number()
-   * Options for finding similar links.
+      .optional()
-   */
+      .describe('The number of highlights to return for each URL.')
-  export type FindSimilarOptions = BaseSearchOptions & {
+  })
-    /** If true, excludes links from the base domain of the input. */
+  export type HighlightsContentsOptions = z.infer<
-    excludeSourceDomain?: boolean
+    typeof HighlightsContentsOptionsSchema
  }
  /**
   * Search options for performing a search query.
   */
  export type ContentsOptions = {
    /** Options for retrieving text contents. */
    text?: TextContentsOptions | true
    /** Options for retrieving highlights. */
    highlights?: HighlightsContentsOptions | true
  }
  /**
   * Options for retrieving text from page.
   */
  export type TextContentsOptions = {
    /** The maximum number of characters to return. */
    maxCharacters?: number
    /** If true, includes HTML tags in the returned text. Default: false */
    includeHtmlTags?: boolean
  }
  /**
   * Options for retrieving highlights from page.
   * @typedef {Object} HighlightsContentsOptions
   */
  export type HighlightsContentsOptions = {
    /** The query string to use for highlights search. */
    query?: string
    /** The number of sentences to return for each highlight. */
    numSentences?: number
    /** The number of highlights to return for each URL. */
    highlightsPerUrl?: number
  }
  export type TextResponse = {
    /** Text from page */
    text: string
  }
  export type HighlightsResponse = {
    /** The highlights as an array of strings. */
    highlights: string[]
    /** The corresponding scores as an array of floats, 0 to 1 */
    highlightScores: number[]
  }
  export type Default<T extends {}, U> = [keyof T] extends [never] ? U : T
  /**
   * Depending on 'ContentsOptions', this yields either a 'TextResponse',
   * a 'HighlightsResponse', both, or an empty object.
   */
  export type ContentsResultComponent<T extends ContentsOptions> = Default<
    (T['text'] extends object | true ? TextResponse : {}) &
      (T['highlights'] extends object | true ? HighlightsResponse : {}),
    TextResponse
  >
  export const ContentsOptionsSchema = z.object({
    text: z.union([TextContentsOptionsSchema, z.literal(true)]).optional(),
    highlights: z
      .union([HighlightsContentsOptionsSchema, z.literal(true)])
      .optional()
  })
  export type ContentsOptions = z.infer<typeof ContentsOptionsSchema>
  export const BaseSearchOptionsSchema = z.object({
    numResults: z
      .number()
      .optional()
      .describe('Number of search results to return.'),
    includeDomains: z
      .array(z.string())
      .optional()
      .describe('List of domains to include in the search.'),
    excludeDomains: z
      .array(z.string())
      .optional()
      .describe('List of domains to exclude from the search.'),
    startCrawlDate: z
      .string()
      .optional()
      .describe('Start date for results based on crawl date.'),
    endCrawlDate: z
      .string()
      .optional()
      .describe('End date for results based on crawl date.'),
    startPublishedDate: z
      .string()
      .optional()
      .describe('Start date for results based on published date.'),
    endPublishedDate: z
      .string()
      .optional()
      .describe('End date for results based on published date.'),
    category: z
      .string()
      .optional()
      .describe(
        'A data category to focus on, with higher comprehensivity and data cleanliness. Currently, the only category is company.'
      ),
    contents: ContentsOptionsSchema.optional().describe(
      'Whether to include the contents of the search results.'
    )
  })
  export type BaseSearchOptions = z.infer<typeof BaseSearchOptionsSchema>
  export const RegularSearchOptionsSchema = BaseSearchOptionsSchema.extend({
    query: z.string().describe('search query'),
    useAutoprompt: z.boolean().optional(),
    type: z.enum(['keyword', 'neural', 'magic']).optional()
  })
  export type RegularSearchOptions = z.infer<typeof RegularSearchOptionsSchema>
  export const FindSimilarOptionsSchema = BaseSearchOptionsSchema.extend({
    url: z
      .string()
      .describe('The url for which you would like to find similar links'),
    excludeSourceDomain: z
      .boolean()
      .optional()
      .describe('If true, excludes links from the base domain of the input.')
  })
  export type FindSimilarOptions = z.infer<typeof FindSimilarOptionsSchema>
  export const GetContentsOptionsSchema = ContentsOptionsSchema.extend({
    ids: z
      .array(z.string())
      .nonempty()
      .describe('Exa IDs of the documents to retrieve.')
  })
  export type GetContentsOptions = z.infer<typeof GetContentsOptionsSchema>
  /**
   * Represents a search result object.
   */
-  export type SearchResult<T extends ContentsOptions = ContentsOptions> = {
+  export type SearchResult = {
    /** The title of the search result. */
    title: string | null
    /** The URL of the search result. */
    url: string
    /** The estimated creation date of the content. */
    publishedDate?: string
    /** The author of the content, if available. */
    author?: string
    /** Similarity score between the query/url and the result. */
    score?: number
-    /** The temporary ID for the document. */
+
    /** The temporary Exa ID for the document. */
    id: string
-  } & ContentsResultComponent<T>
+
    /** Text from page */
    text?: string
    /** The highlights as an array of strings. */
    highlights?: string[]
    /** The corresponding scores as an array of floats, 0 to 1 */
    highlightScores?: number[]
  }
  /**
   * Represents a search response object.
   */
-  export type SearchResponse<T extends ContentsOptions = ContentsOptions> = {
+  export type SearchResponse = {
    /** The list of search results. */
-    results: SearchResult<T>[]
+    results: SearchResult[]
    /** The autoprompt string, if applicable. */
    autopromptString?: string
  }
 }
-export class ExaClient {
+export class ExaClient extends AIFunctionsProvider {
  readonly apiKey: string
  readonly apiBaseUrl: string
  readonly ky: KyInstance
@ -147,6 +172,7 @@ export class ExaClient {
      apiKey,
      'ExaClient missing required "apiKey" (defaults to "EXA_API_KEY")'
    )
    super()
    this.apiKey = apiKey
    this.apiBaseUrl = apiBaseUrl
@ -162,103 +188,52 @@ export class ExaClient {
  /**
   * Performs an Exa search for the given query.
   */
-  async search(opts: { query: string } & exa.RegularSearchOptions) {
+  @aiFunction({
-    return this.ky.get('search', { json: opts }).json<exa.SearchResponse>()
+    name: 'exa_search',
-  }
+    description: 'Search the web for the given query.',
    inputSchema: exa.RegularSearchOptionsSchema
  })
  async search(queryOrOpts: string | exa.RegularSearchOptions) {
    const json =
      typeof queryOrOpts === 'string' ? { query: queryOrOpts } : queryOrOpts
-  /**
+    return this.ky.post('search', { json }).json<exa.SearchResponse>()
   * Performs a search with a Exa prompt-engineered query and returns the
   * contents of the documents.
   */
  async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
    query,
    text,
    highlights,
    ...rest
  }: { query: string } & exa.RegularSearchOptions & T) {
    return this.ky
      .post('search', {
        json: {
          query,
          contents:
            !text && !highlights
              ? { text: true }
              : {
                  ...(text ? { text } : {}),
                  ...(highlights ? { highlights } : {})
                },
          ...rest
        }
      })
      .json<exa.SearchResponse<T>>()
  }
  /**
   * Finds similar links to the provided URL.
   */
-  async findSimilar(opts: { url: string } & exa.FindSimilarOptions) {
+  @aiFunction({
    name: 'exa_find_similar',
    description: 'Find similar links to the provided URL.',
    inputSchema: exa.FindSimilarOptionsSchema
  })
  async findSimilar(opts: exa.FindSimilarOptions) {
    return this.ky
      .post('findSimilar', { json: opts })
      .json<exa.SearchResponse>()
  }
  /**
-   * Finds similar links to the provided URL and returns the contents of the
+   * Retrieves contents of documents based on a list of Exa document IDs.
   * documents.
   */
-  async findSimilarAndContents<
+  @aiFunction({
-    T extends exa.ContentsOptions = exa.ContentsOptions
+    name: 'exa_get_contents',
-  >({
+    description:
-    url,
+      'Retrieve contents of documents based on a list of Exa document IDs.',
-    text,
+    inputSchema: exa.GetContentsOptionsSchema
-    highlights,
+  })
-    ...rest
+  async getContents({ ids, ...opts }: exa.GetContentsOptions) {
-  }: { url: string } & exa.FindSimilarOptions & T) {
+    const documentIDs = Array.isArray(ids) ? ids : [ids]
-    return this.ky
+    assert(documentIDs.length, 'Must provide at least one document ID')
      .post('findSimilar', {
        json: {
          url,
          contents:
            !text && !highlights
              ? { text: true }
              : {
                  ...(text ? { text } : {}),
                  ...(highlights ? { highlights } : {})
                },
          ...rest
        }
      })
      .json<exa.SearchResponse<T>>()
  }
  /**
   * Retrieves contents of documents based on a list of document IDs.
   */
  async getContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
    ids,
    ...opts
  }: { ids: string | string[] | exa.SearchResult[] } & T) {
    let requestIds: string[]
    if (typeof ids === 'string') {
      requestIds = [ids]
    } else if (typeof ids[0] === 'string') {
      requestIds = ids as string[]
    } else {
      requestIds = (ids as exa.SearchResult[]).map((result) => result.id)
    }
    if (ids.length === 0) {
      throw new Error('Must provide at least one ID')
    }
    return this.ky
      .post('contents', {
        json: {
          ...opts,
-          ids: requestIds
+          ids: documentIDs
        }
      })
-      .json<exa.SearchResponse<T>>()
+      .json<exa.SearchResponse>()
  }
 }
--- a/src/services/firecrawl-client.ts
+++ b/src/services/firecrawl-client.ts
@ -5,7 +5,8 @@ import { aiFunction, AIFunctionsProvider } from '../fns.js'
 import { assert, delay, getEnv } from '../utils.js'
 import { zodToJsonSchema } from '../zod-to-json-schema.js'
-// TODO: Deprioritizing this client for now because the API doesn't seem to be stable.
+// TODO: Deprioritizing this client for now because the API doesn't seem to be
 // stable.
 export namespace firecrawl {
  /**