kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
pull/643/head^2
rodzic
2ea18abab2
commit
ca31b560a8
|
@ -23,6 +23,7 @@
|
|||
- dexa
|
||||
- diffbot
|
||||
- exa
|
||||
- firecrawl
|
||||
- people data labs
|
||||
- perigon
|
||||
- predict leads
|
||||
|
@ -43,11 +44,9 @@
|
|||
- agentic
|
||||
- walter
|
||||
- services
|
||||
- exa - need to update to correct format
|
||||
- wolfram alpha
|
||||
- wikipedia
|
||||
- midjourney
|
||||
- firecrawl
|
||||
- unstructured
|
||||
- pull from [langchain](https://github.com/langchain-ai/langchainjs/tree/main/langchain)
|
||||
- pull from other libs
|
||||
|
|
|
@ -156,24 +156,23 @@ export class ExaClient {
|
|||
})
|
||||
}
|
||||
|
||||
async search(query: string, options?: exa.RegularSearchOptions) {
|
||||
return this.ky
|
||||
.post('search', { json: { ...options, query } })
|
||||
.json<exa.SearchResponse>()
|
||||
/**
|
||||
* Performs an Exa search for the given query.
|
||||
*/
|
||||
async search(opts: { query: string } & exa.RegularSearchOptions) {
|
||||
return this.ky.post('search', { json: opts }).json<exa.SearchResponse>()
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a search with a Exa prompt-engineered query and returns the
|
||||
* contents of the documents.
|
||||
*
|
||||
* @param {string} query - The query string.
|
||||
*/
|
||||
async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>(
|
||||
query: string,
|
||||
options?: exa.RegularSearchOptions & T
|
||||
) {
|
||||
const { text, highlights, ...rest } = options || {}
|
||||
|
||||
async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
|
||||
query,
|
||||
text,
|
||||
highlights,
|
||||
...rest
|
||||
}: { query: string } & exa.RegularSearchOptions & T) {
|
||||
return this.ky
|
||||
.post('search', {
|
||||
json: {
|
||||
|
@ -193,12 +192,10 @@ export class ExaClient {
|
|||
|
||||
/**
|
||||
* Finds similar links to the provided URL.
|
||||
*
|
||||
* @param {string} url - The URL for which to find similar links.
|
||||
*/
|
||||
async findSimilar(url: string, options?: exa.FindSimilarOptions) {
|
||||
async findSimilar(opts: { url: string } & exa.FindSimilarOptions) {
|
||||
return this.ky
|
||||
.post('findSimilar', { json: { url, ...options } })
|
||||
.post('findSimilar', { json: opts })
|
||||
.json<exa.SearchResponse>()
|
||||
}
|
||||
|
||||
|
@ -210,9 +207,12 @@ export class ExaClient {
|
|||
*/
|
||||
async findSimilarAndContents<
|
||||
T extends exa.ContentsOptions = exa.ContentsOptions
|
||||
>(url: string, options?: exa.FindSimilarOptions & T) {
|
||||
const { text, highlights, ...rest } = options || {}
|
||||
|
||||
>({
|
||||
url,
|
||||
text,
|
||||
highlights,
|
||||
...rest
|
||||
}: { url: string } & exa.FindSimilarOptions & T) {
|
||||
return this.ky
|
||||
.post('findSimilar', {
|
||||
json: {
|
||||
|
@ -235,10 +235,10 @@ export class ExaClient {
|
|||
*
|
||||
* @param {string | string[] | SearchResult[]} ids - An array of document IDs.
|
||||
*/
|
||||
async getContents<T extends exa.ContentsOptions>(
|
||||
ids: string | string[] | exa.SearchResult[],
|
||||
options?: T
|
||||
) {
|
||||
async getContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
|
||||
ids,
|
||||
...opts
|
||||
}: { ids: string | string[] | exa.SearchResult[] } & T) {
|
||||
let requestIds: string[]
|
||||
|
||||
if (typeof ids === 'string') {
|
||||
|
@ -256,8 +256,8 @@ export class ExaClient {
|
|||
return this.ky
|
||||
.post('contents', {
|
||||
json: {
|
||||
ids: requestIds,
|
||||
...options
|
||||
...opts,
|
||||
ids: requestIds
|
||||
}
|
||||
})
|
||||
.json<exa.SearchResponse<T>>()
|
||||
|
|
|
@ -0,0 +1,205 @@
|
|||
import defaultKy, { type KyInstance } from 'ky'
|
||||
import z from 'zod'
|
||||
|
||||
import { assert, delay, getEnv } from '../utils.js'
|
||||
import { zodToJsonSchema } from '../zod-to-json-schema.js'
|
||||
|
||||
export namespace firecrawl {
|
||||
/**
|
||||
* Generic parameter interface.
|
||||
*/
|
||||
export interface Params {
|
||||
[key: string]: any
|
||||
extractorOptions?: {
|
||||
extractionSchema: z.ZodSchema | any
|
||||
mode?: 'llm-extraction'
|
||||
extractionPrompt?: string
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
*/
|
||||
export interface ScrapeResponse {
|
||||
success: boolean
|
||||
data?: any
|
||||
error?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for searching operations.
|
||||
*/
|
||||
export interface SearchResponse {
|
||||
success: boolean
|
||||
data?: any
|
||||
error?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
*/
|
||||
export interface CrawlResponse {
|
||||
success: boolean
|
||||
jobId?: string
|
||||
data?: any
|
||||
error?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for job status checks.
|
||||
*/
|
||||
export interface JobStatusResponse {
|
||||
success: boolean
|
||||
status: string
|
||||
jobId?: string
|
||||
data?: any
|
||||
error?: string
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see https://www.firecrawl.dev
|
||||
*/
|
||||
export class FirecrawlClient {
|
||||
readonly ky: KyInstance
|
||||
readonly apiKey: string
|
||||
readonly apiBaseUrl: string
|
||||
|
||||
constructor({
|
||||
apiKey = getEnv('FIRECRAWL_API_KEY'),
|
||||
apiBaseUrl = getEnv('FIRECRAWL_API_BASE_URL') ??
|
||||
'https://api.firecrawl.dev',
|
||||
ky = defaultKy
|
||||
}: {
|
||||
apiKey?: string
|
||||
apiBaseUrl?: string
|
||||
ky?: KyInstance
|
||||
} = {}) {
|
||||
assert(
|
||||
apiKey,
|
||||
'FirecrawlClient missing required "apiKey" (defaults to "FIRECRAWL_API_KEY")'
|
||||
)
|
||||
assert(
|
||||
apiBaseUrl,
|
||||
'FirecrawlClient missing required "apiBaseUrl" (defaults to "FIRECRAWL_API_BASE_URL")'
|
||||
)
|
||||
|
||||
this.apiKey = apiKey
|
||||
this.apiBaseUrl = apiBaseUrl
|
||||
|
||||
this.ky = ky.extend({
|
||||
prefixUrl: apiBaseUrl,
|
||||
headers: {
|
||||
Authorization: `Bearer ${this.apiKey}`
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
async scrapeUrl(
|
||||
opts: {
|
||||
url: string
|
||||
} & firecrawl.Params
|
||||
) {
|
||||
const json = {
|
||||
...opts
|
||||
}
|
||||
|
||||
if (opts?.extractorOptions?.extractionSchema) {
|
||||
let schema = opts.extractorOptions.extractionSchema
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
schema = zodToJsonSchema(schema)
|
||||
}
|
||||
|
||||
json.extractorOptions = {
|
||||
mode: 'llm-extraction',
|
||||
...opts.extractorOptions,
|
||||
extractionSchema: schema
|
||||
}
|
||||
}
|
||||
|
||||
return this.ky
|
||||
.post('v0/scrapeUrl', { json })
|
||||
.json<firecrawl.ScrapeResponse>()
|
||||
}
|
||||
|
||||
async search(
|
||||
opts: {
|
||||
query: string
|
||||
} & firecrawl.Params
|
||||
) {
|
||||
return this.ky
|
||||
.post('v0/search', { json: opts })
|
||||
.json<firecrawl.SearchResponse>()
|
||||
}
|
||||
|
||||
async crawlUrl({
|
||||
waitUntilDone = true,
|
||||
timeoutMs = 30_000,
|
||||
idempotencyKey,
|
||||
...params
|
||||
}: {
|
||||
url: string
|
||||
waitUntilDone?: boolean
|
||||
timeoutMs?: number
|
||||
idempotencyKey?: string
|
||||
} & firecrawl.Params) {
|
||||
const res = await this.ky
|
||||
.post('v0/crawl', {
|
||||
json: params,
|
||||
timeout: timeoutMs,
|
||||
headers: idempotencyKey
|
||||
? {
|
||||
'x-idempotency-key': idempotencyKey
|
||||
}
|
||||
: undefined
|
||||
})
|
||||
.json<firecrawl.CrawlResponse>()
|
||||
|
||||
assert(res.jobId)
|
||||
if (waitUntilDone) {
|
||||
return this.waitForCrawlJob({ jobId: res.jobId, timeoutMs })
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
async checkCrawlStatus(jobId: string) {
|
||||
assert(jobId)
|
||||
|
||||
return this.ky
|
||||
.get(`v0/crawl/status/${jobId}`)
|
||||
.json<firecrawl.JobStatusResponse>()
|
||||
}
|
||||
|
||||
async waitForCrawlJob({
|
||||
jobId,
|
||||
timeoutMs = 30_000
|
||||
}: {
|
||||
jobId: string
|
||||
timeoutMs?: number
|
||||
}) {
|
||||
assert(jobId)
|
||||
|
||||
const start = Date.now()
|
||||
do {
|
||||
const res = await this.checkCrawlStatus(jobId)
|
||||
if (res.status === 'completed') {
|
||||
return res
|
||||
}
|
||||
|
||||
if (!['active', 'paused', 'pending', 'queued'].includes(res.status)) {
|
||||
throw new Error(
|
||||
`Crawl job "${jobId}" failed or was stopped. Status: ${res.status}`
|
||||
)
|
||||
}
|
||||
|
||||
if (Date.now() - start > timeoutMs) {
|
||||
throw new Error(
|
||||
`Timeout waiting for crawl job "${jobId}" to complete: ${res.status}`
|
||||
)
|
||||
}
|
||||
|
||||
await delay(1000)
|
||||
} while (true)
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@ export * from './clearbit-client.js'
|
|||
export * from './dexa-client.js'
|
||||
export * from './diffbot-client.js'
|
||||
export * from './exa-client.js'
|
||||
export * from './firecrawl-client.js'
|
||||
export * from './people-data-labs-client.js'
|
||||
export * from './perigon-client.js'
|
||||
export * from './predict-leads-client.js'
|
||||
|
|
Ładowanie…
Reference in New Issue