👱

2024-05-26 17:07:47 -05:00 · 2024-05-26 17:07:47 -05:00 · ca31b560a8
commit ca31b560a8
--- a/readme.md
+++ b/readme.md
@ -23,6 +23,7 @@
 - dexa
 - diffbot
 - exa
 - firecrawl
 - people data labs
 - perigon
 - predict leads
@ -43,11 +44,9 @@
    - agentic
    - walter
 - services
  - exa - need to update to correct format
  - wolfram alpha
  - wikipedia
  - midjourney
  - firecrawl
  - unstructured
  - pull from [langchain](https://github.com/langchain-ai/langchainjs/tree/main/langchain)
  - pull from other libs
--- a/src/services/exa-client.ts
+++ b/src/services/exa-client.ts
@ -156,24 +156,23 @@ export class ExaClient {
    })
  }
-  async search(query: string, options?: exa.RegularSearchOptions) {
+  /**
-    return this.ky
+   * Performs an Exa search for the given query.
-      .post('search', { json: { ...options, query } })
+   */
-      .json<exa.SearchResponse>()
+  async search(opts: { query: string } & exa.RegularSearchOptions) {
    return this.ky.post('search', { json: opts }).json<exa.SearchResponse>()
  }
  /**
   * Performs a search with a Exa prompt-engineered query and returns the
   * contents of the documents.
   *
   * @param {string} query - The query string.
   */
-  async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>(
+  async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
-    query: string,
+    query,
-    options?: exa.RegularSearchOptions & T
+    text,
-  ) {
+    highlights,
-    const { text, highlights, ...rest } = options || {}
+    ...rest
-
+  }: { query: string } & exa.RegularSearchOptions & T) {
    return this.ky
      .post('search', {
        json: {
@ -193,12 +192,10 @@ export class ExaClient {
  /**
   * Finds similar links to the provided URL.
   *
   * @param {string} url - The URL for which to find similar links.
   */
-  async findSimilar(url: string, options?: exa.FindSimilarOptions) {
+  async findSimilar(opts: { url: string } & exa.FindSimilarOptions) {
    return this.ky
-      .post('findSimilar', { json: { url, ...options } })
+      .post('findSimilar', { json: opts })
      .json<exa.SearchResponse>()
  }
@ -210,9 +207,12 @@ export class ExaClient {
   */
  async findSimilarAndContents<
    T extends exa.ContentsOptions = exa.ContentsOptions
-  >(url: string, options?: exa.FindSimilarOptions & T) {
+  >({
-    const { text, highlights, ...rest } = options || {}
+    url,
-
+    text,
    highlights,
    ...rest
  }: { url: string } & exa.FindSimilarOptions & T) {
    return this.ky
      .post('findSimilar', {
        json: {
@ -235,10 +235,10 @@ export class ExaClient {
   *
   * @param {string | string[] | SearchResult[]} ids - An array of document IDs.
   */
-  async getContents<T extends exa.ContentsOptions>(
+  async getContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
-    ids: string | string[] | exa.SearchResult[],
+    ids,
-    options?: T
+    ...opts
-  ) {
+  }: { ids: string | string[] | exa.SearchResult[] } & T) {
    let requestIds: string[]
    if (typeof ids === 'string') {
@ -256,8 +256,8 @@ export class ExaClient {
    return this.ky
      .post('contents', {
        json: {
-          ids: requestIds,
+          ...opts,
-          ...options
+          ids: requestIds
        }
      })
      .json<exa.SearchResponse<T>>()
--- a/src/services/firecrawl-client.ts
+++ b/src/services/firecrawl-client.ts
@ -0,0 +1,205 @@
 import defaultKy, { type KyInstance } from 'ky'
 import z from 'zod'
 import { assert, delay, getEnv } from '../utils.js'
 import { zodToJsonSchema } from '../zod-to-json-schema.js'
 export namespace firecrawl {
  /**
   * Generic parameter interface.
   */
  export interface Params {
    [key: string]: any
    extractorOptions?: {
      extractionSchema: z.ZodSchema | any
      mode?: 'llm-extraction'
      extractionPrompt?: string
    }
  }
  /**
   * Response interface for scraping operations.
   */
  export interface ScrapeResponse {
    success: boolean
    data?: any
    error?: string
  }
  /**
   * Response interface for searching operations.
   */
  export interface SearchResponse {
    success: boolean
    data?: any
    error?: string
  }
  /**
   * Response interface for crawling operations.
   */
  export interface CrawlResponse {
    success: boolean
    jobId?: string
    data?: any
    error?: string
  }
  /**
   * Response interface for job status checks.
   */
  export interface JobStatusResponse {
    success: boolean
    status: string
    jobId?: string
    data?: any
    error?: string
  }
 }
 /**
 * @see https://www.firecrawl.dev
 */
 export class FirecrawlClient {
  readonly ky: KyInstance
  readonly apiKey: string
  readonly apiBaseUrl: string
  constructor({
    apiKey = getEnv('FIRECRAWL_API_KEY'),
    apiBaseUrl = getEnv('FIRECRAWL_API_BASE_URL') ??
      'https://api.firecrawl.dev',
    ky = defaultKy
  }: {
    apiKey?: string
    apiBaseUrl?: string
    ky?: KyInstance
  } = {}) {
    assert(
      apiKey,
      'FirecrawlClient missing required "apiKey" (defaults to "FIRECRAWL_API_KEY")'
    )
    assert(
      apiBaseUrl,
      'FirecrawlClient missing required "apiBaseUrl" (defaults to "FIRECRAWL_API_BASE_URL")'
    )
    this.apiKey = apiKey
    this.apiBaseUrl = apiBaseUrl
    this.ky = ky.extend({
      prefixUrl: apiBaseUrl,
      headers: {
        Authorization: `Bearer ${this.apiKey}`
      }
    })
  }
  async scrapeUrl(
    opts: {
      url: string
    } & firecrawl.Params
  ) {
    const json = {
      ...opts
    }
    if (opts?.extractorOptions?.extractionSchema) {
      let schema = opts.extractorOptions.extractionSchema
      if (schema instanceof z.ZodSchema) {
        schema = zodToJsonSchema(schema)
      }
      json.extractorOptions = {
        mode: 'llm-extraction',
        ...opts.extractorOptions,
        extractionSchema: schema
      }
    }
    return this.ky
      .post('v0/scrapeUrl', { json })
      .json<firecrawl.ScrapeResponse>()
  }
  async search(
    opts: {
      query: string
    } & firecrawl.Params
  ) {
    return this.ky
      .post('v0/search', { json: opts })
      .json<firecrawl.SearchResponse>()
  }
  async crawlUrl({
    waitUntilDone = true,
    timeoutMs = 30_000,
    idempotencyKey,
    ...params
  }: {
    url: string
    waitUntilDone?: boolean
    timeoutMs?: number
    idempotencyKey?: string
  } & firecrawl.Params) {
    const res = await this.ky
      .post('v0/crawl', {
        json: params,
        timeout: timeoutMs,
        headers: idempotencyKey
          ? {
              'x-idempotency-key': idempotencyKey
            }
          : undefined
      })
      .json<firecrawl.CrawlResponse>()
    assert(res.jobId)
    if (waitUntilDone) {
      return this.waitForCrawlJob({ jobId: res.jobId, timeoutMs })
    }
    return res
  }
  async checkCrawlStatus(jobId: string) {
    assert(jobId)
    return this.ky
      .get(`v0/crawl/status/${jobId}`)
      .json<firecrawl.JobStatusResponse>()
  }
  async waitForCrawlJob({
    jobId,
    timeoutMs = 30_000
  }: {
    jobId: string
    timeoutMs?: number
  }) {
    assert(jobId)
    const start = Date.now()
    do {
      const res = await this.checkCrawlStatus(jobId)
      if (res.status === 'completed') {
        return res
      }
      if (!['active', 'paused', 'pending', 'queued'].includes(res.status)) {
        throw new Error(
          `Crawl job "${jobId}" failed or was stopped. Status: ${res.status}`
        )
      }
      if (Date.now() - start > timeoutMs) {
        throw new Error(
          `Timeout waiting for crawl job "${jobId}" to complete: ${res.status}`
        )
      }
      await delay(1000)
    } while (true)
  }
 }
--- a/src/services/index.ts
+++ b/src/services/index.ts
@ -2,6 +2,7 @@ export * from './clearbit-client.js'
 export * from './dexa-client.js'
 export * from './diffbot-client.js'
 export * from './exa-client.js'
 export * from './firecrawl-client.js'
 export * from './people-data-labs-client.js'
 export * from './perigon-client.js'
 export * from './predict-leads-client.js'