pull/643/head^2
Travis Fischer 2024-05-26 17:07:47 -05:00
rodzic 2ea18abab2
commit ca31b560a8
4 zmienionych plików z 232 dodań i 27 usunięć

Wyświetl plik

@ -23,6 +23,7 @@
- dexa
- diffbot
- exa
- firecrawl
- people data labs
- perigon
- predict leads
@ -43,11 +44,9 @@
- agentic
- walter
- services
- exa - need to update to correct format
- wolfram alpha
- wikipedia
- midjourney
- firecrawl
- unstructured
- pull from [langchain](https://github.com/langchain-ai/langchainjs/tree/main/langchain)
- pull from other libs

Wyświetl plik

@ -156,24 +156,23 @@ export class ExaClient {
})
}
async search(query: string, options?: exa.RegularSearchOptions) {
return this.ky
.post('search', { json: { ...options, query } })
.json<exa.SearchResponse>()
/**
* Performs an Exa search for the given query.
*/
async search(opts: { query: string } & exa.RegularSearchOptions) {
return this.ky.post('search', { json: opts }).json<exa.SearchResponse>()
}
/**
* Performs a search with a Exa prompt-engineered query and returns the
* contents of the documents.
*
* @param {string} query - The query string.
*/
async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>(
query: string,
options?: exa.RegularSearchOptions & T
) {
const { text, highlights, ...rest } = options || {}
async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
query,
text,
highlights,
...rest
}: { query: string } & exa.RegularSearchOptions & T) {
return this.ky
.post('search', {
json: {
@ -193,12 +192,10 @@ export class ExaClient {
/**
* Finds similar links to the provided URL.
*
* @param {string} url - The URL for which to find similar links.
*/
async findSimilar(url: string, options?: exa.FindSimilarOptions) {
async findSimilar(opts: { url: string } & exa.FindSimilarOptions) {
return this.ky
.post('findSimilar', { json: { url, ...options } })
.post('findSimilar', { json: opts })
.json<exa.SearchResponse>()
}
@ -210,9 +207,12 @@ export class ExaClient {
*/
async findSimilarAndContents<
T extends exa.ContentsOptions = exa.ContentsOptions
>(url: string, options?: exa.FindSimilarOptions & T) {
const { text, highlights, ...rest } = options || {}
>({
url,
text,
highlights,
...rest
}: { url: string } & exa.FindSimilarOptions & T) {
return this.ky
.post('findSimilar', {
json: {
@ -235,10 +235,10 @@ export class ExaClient {
*
* @param {string | string[] | SearchResult[]} ids - An array of document IDs.
*/
async getContents<T extends exa.ContentsOptions>(
ids: string | string[] | exa.SearchResult[],
options?: T
) {
async getContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
ids,
...opts
}: { ids: string | string[] | exa.SearchResult[] } & T) {
let requestIds: string[]
if (typeof ids === 'string') {
@ -256,8 +256,8 @@ export class ExaClient {
return this.ky
.post('contents', {
json: {
ids: requestIds,
...options
...opts,
ids: requestIds
}
})
.json<exa.SearchResponse<T>>()

Wyświetl plik

@ -0,0 +1,205 @@
import defaultKy, { type KyInstance } from 'ky'
import z from 'zod'
import { assert, delay, getEnv } from '../utils.js'
import { zodToJsonSchema } from '../zod-to-json-schema.js'
export namespace firecrawl {
/**
* Generic parameter interface.
*/
export interface Params {
[key: string]: any
extractorOptions?: {
extractionSchema: z.ZodSchema | any
mode?: 'llm-extraction'
extractionPrompt?: string
}
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean
data?: any
error?: string
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean
data?: any
error?: string
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean
jobId?: string
data?: any
error?: string
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean
status: string
jobId?: string
data?: any
error?: string
}
}
/**
* @see https://www.firecrawl.dev
*/
export class FirecrawlClient {
readonly ky: KyInstance
readonly apiKey: string
readonly apiBaseUrl: string
constructor({
apiKey = getEnv('FIRECRAWL_API_KEY'),
apiBaseUrl = getEnv('FIRECRAWL_API_BASE_URL') ??
'https://api.firecrawl.dev',
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
apiKey,
'FirecrawlClient missing required "apiKey" (defaults to "FIRECRAWL_API_KEY")'
)
assert(
apiBaseUrl,
'FirecrawlClient missing required "apiBaseUrl" (defaults to "FIRECRAWL_API_BASE_URL")'
)
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({
prefixUrl: apiBaseUrl,
headers: {
Authorization: `Bearer ${this.apiKey}`
}
})
}
async scrapeUrl(
opts: {
url: string
} & firecrawl.Params
) {
const json = {
...opts
}
if (opts?.extractorOptions?.extractionSchema) {
let schema = opts.extractorOptions.extractionSchema
if (schema instanceof z.ZodSchema) {
schema = zodToJsonSchema(schema)
}
json.extractorOptions = {
mode: 'llm-extraction',
...opts.extractorOptions,
extractionSchema: schema
}
}
return this.ky
.post('v0/scrapeUrl', { json })
.json<firecrawl.ScrapeResponse>()
}
async search(
opts: {
query: string
} & firecrawl.Params
) {
return this.ky
.post('v0/search', { json: opts })
.json<firecrawl.SearchResponse>()
}
async crawlUrl({
waitUntilDone = true,
timeoutMs = 30_000,
idempotencyKey,
...params
}: {
url: string
waitUntilDone?: boolean
timeoutMs?: number
idempotencyKey?: string
} & firecrawl.Params) {
const res = await this.ky
.post('v0/crawl', {
json: params,
timeout: timeoutMs,
headers: idempotencyKey
? {
'x-idempotency-key': idempotencyKey
}
: undefined
})
.json<firecrawl.CrawlResponse>()
assert(res.jobId)
if (waitUntilDone) {
return this.waitForCrawlJob({ jobId: res.jobId, timeoutMs })
}
return res
}
async checkCrawlStatus(jobId: string) {
assert(jobId)
return this.ky
.get(`v0/crawl/status/${jobId}`)
.json<firecrawl.JobStatusResponse>()
}
async waitForCrawlJob({
jobId,
timeoutMs = 30_000
}: {
jobId: string
timeoutMs?: number
}) {
assert(jobId)
const start = Date.now()
do {
const res = await this.checkCrawlStatus(jobId)
if (res.status === 'completed') {
return res
}
if (!['active', 'paused', 'pending', 'queued'].includes(res.status)) {
throw new Error(
`Crawl job "${jobId}" failed or was stopped. Status: ${res.status}`
)
}
if (Date.now() - start > timeoutMs) {
throw new Error(
`Timeout waiting for crawl job "${jobId}" to complete: ${res.status}`
)
}
await delay(1000)
} while (true)
}
}

Wyświetl plik

@ -2,6 +2,7 @@ export * from './clearbit-client.js'
export * from './dexa-client.js'
export * from './diffbot-client.js'
export * from './exa-client.js'
export * from './firecrawl-client.js'
export * from './people-data-labs-client.js'
export * from './perigon-client.js'
export * from './predict-leads-client.js'