kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
pull/643/head^2
rodzic
2ea18abab2
commit
ca31b560a8
|
@ -23,6 +23,7 @@
|
||||||
- dexa
|
- dexa
|
||||||
- diffbot
|
- diffbot
|
||||||
- exa
|
- exa
|
||||||
|
- firecrawl
|
||||||
- people data labs
|
- people data labs
|
||||||
- perigon
|
- perigon
|
||||||
- predict leads
|
- predict leads
|
||||||
|
@ -43,11 +44,9 @@
|
||||||
- agentic
|
- agentic
|
||||||
- walter
|
- walter
|
||||||
- services
|
- services
|
||||||
- exa - need to update to correct format
|
|
||||||
- wolfram alpha
|
- wolfram alpha
|
||||||
- wikipedia
|
- wikipedia
|
||||||
- midjourney
|
- midjourney
|
||||||
- firecrawl
|
|
||||||
- unstructured
|
- unstructured
|
||||||
- pull from [langchain](https://github.com/langchain-ai/langchainjs/tree/main/langchain)
|
- pull from [langchain](https://github.com/langchain-ai/langchainjs/tree/main/langchain)
|
||||||
- pull from other libs
|
- pull from other libs
|
||||||
|
|
|
@ -156,24 +156,23 @@ export class ExaClient {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async search(query: string, options?: exa.RegularSearchOptions) {
|
/**
|
||||||
return this.ky
|
* Performs an Exa search for the given query.
|
||||||
.post('search', { json: { ...options, query } })
|
*/
|
||||||
.json<exa.SearchResponse>()
|
async search(opts: { query: string } & exa.RegularSearchOptions) {
|
||||||
|
return this.ky.post('search', { json: opts }).json<exa.SearchResponse>()
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a search with a Exa prompt-engineered query and returns the
|
* Performs a search with a Exa prompt-engineered query and returns the
|
||||||
* contents of the documents.
|
* contents of the documents.
|
||||||
*
|
|
||||||
* @param {string} query - The query string.
|
|
||||||
*/
|
*/
|
||||||
async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>(
|
async searchAndContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
|
||||||
query: string,
|
query,
|
||||||
options?: exa.RegularSearchOptions & T
|
text,
|
||||||
) {
|
highlights,
|
||||||
const { text, highlights, ...rest } = options || {}
|
...rest
|
||||||
|
}: { query: string } & exa.RegularSearchOptions & T) {
|
||||||
return this.ky
|
return this.ky
|
||||||
.post('search', {
|
.post('search', {
|
||||||
json: {
|
json: {
|
||||||
|
@ -193,12 +192,10 @@ export class ExaClient {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds similar links to the provided URL.
|
* Finds similar links to the provided URL.
|
||||||
*
|
|
||||||
* @param {string} url - The URL for which to find similar links.
|
|
||||||
*/
|
*/
|
||||||
async findSimilar(url: string, options?: exa.FindSimilarOptions) {
|
async findSimilar(opts: { url: string } & exa.FindSimilarOptions) {
|
||||||
return this.ky
|
return this.ky
|
||||||
.post('findSimilar', { json: { url, ...options } })
|
.post('findSimilar', { json: opts })
|
||||||
.json<exa.SearchResponse>()
|
.json<exa.SearchResponse>()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,9 +207,12 @@ export class ExaClient {
|
||||||
*/
|
*/
|
||||||
async findSimilarAndContents<
|
async findSimilarAndContents<
|
||||||
T extends exa.ContentsOptions = exa.ContentsOptions
|
T extends exa.ContentsOptions = exa.ContentsOptions
|
||||||
>(url: string, options?: exa.FindSimilarOptions & T) {
|
>({
|
||||||
const { text, highlights, ...rest } = options || {}
|
url,
|
||||||
|
text,
|
||||||
|
highlights,
|
||||||
|
...rest
|
||||||
|
}: { url: string } & exa.FindSimilarOptions & T) {
|
||||||
return this.ky
|
return this.ky
|
||||||
.post('findSimilar', {
|
.post('findSimilar', {
|
||||||
json: {
|
json: {
|
||||||
|
@ -235,10 +235,10 @@ export class ExaClient {
|
||||||
*
|
*
|
||||||
* @param {string | string[] | SearchResult[]} ids - An array of document IDs.
|
* @param {string | string[] | SearchResult[]} ids - An array of document IDs.
|
||||||
*/
|
*/
|
||||||
async getContents<T extends exa.ContentsOptions>(
|
async getContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
|
||||||
ids: string | string[] | exa.SearchResult[],
|
ids,
|
||||||
options?: T
|
...opts
|
||||||
) {
|
}: { ids: string | string[] | exa.SearchResult[] } & T) {
|
||||||
let requestIds: string[]
|
let requestIds: string[]
|
||||||
|
|
||||||
if (typeof ids === 'string') {
|
if (typeof ids === 'string') {
|
||||||
|
@ -256,8 +256,8 @@ export class ExaClient {
|
||||||
return this.ky
|
return this.ky
|
||||||
.post('contents', {
|
.post('contents', {
|
||||||
json: {
|
json: {
|
||||||
ids: requestIds,
|
...opts,
|
||||||
...options
|
ids: requestIds
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.json<exa.SearchResponse<T>>()
|
.json<exa.SearchResponse<T>>()
|
||||||
|
|
|
@ -0,0 +1,205 @@
|
||||||
|
import defaultKy, { type KyInstance } from 'ky'
|
||||||
|
import z from 'zod'
|
||||||
|
|
||||||
|
import { assert, delay, getEnv } from '../utils.js'
|
||||||
|
import { zodToJsonSchema } from '../zod-to-json-schema.js'
|
||||||
|
|
||||||
|
export namespace firecrawl {
|
||||||
|
/**
|
||||||
|
* Generic parameter interface.
|
||||||
|
*/
|
||||||
|
export interface Params {
|
||||||
|
[key: string]: any
|
||||||
|
extractorOptions?: {
|
||||||
|
extractionSchema: z.ZodSchema | any
|
||||||
|
mode?: 'llm-extraction'
|
||||||
|
extractionPrompt?: string
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response interface for scraping operations.
|
||||||
|
*/
|
||||||
|
export interface ScrapeResponse {
|
||||||
|
success: boolean
|
||||||
|
data?: any
|
||||||
|
error?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response interface for searching operations.
|
||||||
|
*/
|
||||||
|
export interface SearchResponse {
|
||||||
|
success: boolean
|
||||||
|
data?: any
|
||||||
|
error?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response interface for crawling operations.
|
||||||
|
*/
|
||||||
|
export interface CrawlResponse {
|
||||||
|
success: boolean
|
||||||
|
jobId?: string
|
||||||
|
data?: any
|
||||||
|
error?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response interface for job status checks.
|
||||||
|
*/
|
||||||
|
export interface JobStatusResponse {
|
||||||
|
success: boolean
|
||||||
|
status: string
|
||||||
|
jobId?: string
|
||||||
|
data?: any
|
||||||
|
error?: string
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see https://www.firecrawl.dev
|
||||||
|
*/
|
||||||
|
export class FirecrawlClient {
|
||||||
|
readonly ky: KyInstance
|
||||||
|
readonly apiKey: string
|
||||||
|
readonly apiBaseUrl: string
|
||||||
|
|
||||||
|
constructor({
|
||||||
|
apiKey = getEnv('FIRECRAWL_API_KEY'),
|
||||||
|
apiBaseUrl = getEnv('FIRECRAWL_API_BASE_URL') ??
|
||||||
|
'https://api.firecrawl.dev',
|
||||||
|
ky = defaultKy
|
||||||
|
}: {
|
||||||
|
apiKey?: string
|
||||||
|
apiBaseUrl?: string
|
||||||
|
ky?: KyInstance
|
||||||
|
} = {}) {
|
||||||
|
assert(
|
||||||
|
apiKey,
|
||||||
|
'FirecrawlClient missing required "apiKey" (defaults to "FIRECRAWL_API_KEY")'
|
||||||
|
)
|
||||||
|
assert(
|
||||||
|
apiBaseUrl,
|
||||||
|
'FirecrawlClient missing required "apiBaseUrl" (defaults to "FIRECRAWL_API_BASE_URL")'
|
||||||
|
)
|
||||||
|
|
||||||
|
this.apiKey = apiKey
|
||||||
|
this.apiBaseUrl = apiBaseUrl
|
||||||
|
|
||||||
|
this.ky = ky.extend({
|
||||||
|
prefixUrl: apiBaseUrl,
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${this.apiKey}`
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async scrapeUrl(
|
||||||
|
opts: {
|
||||||
|
url: string
|
||||||
|
} & firecrawl.Params
|
||||||
|
) {
|
||||||
|
const json = {
|
||||||
|
...opts
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opts?.extractorOptions?.extractionSchema) {
|
||||||
|
let schema = opts.extractorOptions.extractionSchema
|
||||||
|
if (schema instanceof z.ZodSchema) {
|
||||||
|
schema = zodToJsonSchema(schema)
|
||||||
|
}
|
||||||
|
|
||||||
|
json.extractorOptions = {
|
||||||
|
mode: 'llm-extraction',
|
||||||
|
...opts.extractorOptions,
|
||||||
|
extractionSchema: schema
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.ky
|
||||||
|
.post('v0/scrapeUrl', { json })
|
||||||
|
.json<firecrawl.ScrapeResponse>()
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(
|
||||||
|
opts: {
|
||||||
|
query: string
|
||||||
|
} & firecrawl.Params
|
||||||
|
) {
|
||||||
|
return this.ky
|
||||||
|
.post('v0/search', { json: opts })
|
||||||
|
.json<firecrawl.SearchResponse>()
|
||||||
|
}
|
||||||
|
|
||||||
|
async crawlUrl({
|
||||||
|
waitUntilDone = true,
|
||||||
|
timeoutMs = 30_000,
|
||||||
|
idempotencyKey,
|
||||||
|
...params
|
||||||
|
}: {
|
||||||
|
url: string
|
||||||
|
waitUntilDone?: boolean
|
||||||
|
timeoutMs?: number
|
||||||
|
idempotencyKey?: string
|
||||||
|
} & firecrawl.Params) {
|
||||||
|
const res = await this.ky
|
||||||
|
.post('v0/crawl', {
|
||||||
|
json: params,
|
||||||
|
timeout: timeoutMs,
|
||||||
|
headers: idempotencyKey
|
||||||
|
? {
|
||||||
|
'x-idempotency-key': idempotencyKey
|
||||||
|
}
|
||||||
|
: undefined
|
||||||
|
})
|
||||||
|
.json<firecrawl.CrawlResponse>()
|
||||||
|
|
||||||
|
assert(res.jobId)
|
||||||
|
if (waitUntilDone) {
|
||||||
|
return this.waitForCrawlJob({ jobId: res.jobId, timeoutMs })
|
||||||
|
}
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
async checkCrawlStatus(jobId: string) {
|
||||||
|
assert(jobId)
|
||||||
|
|
||||||
|
return this.ky
|
||||||
|
.get(`v0/crawl/status/${jobId}`)
|
||||||
|
.json<firecrawl.JobStatusResponse>()
|
||||||
|
}
|
||||||
|
|
||||||
|
async waitForCrawlJob({
|
||||||
|
jobId,
|
||||||
|
timeoutMs = 30_000
|
||||||
|
}: {
|
||||||
|
jobId: string
|
||||||
|
timeoutMs?: number
|
||||||
|
}) {
|
||||||
|
assert(jobId)
|
||||||
|
|
||||||
|
const start = Date.now()
|
||||||
|
do {
|
||||||
|
const res = await this.checkCrawlStatus(jobId)
|
||||||
|
if (res.status === 'completed') {
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!['active', 'paused', 'pending', 'queued'].includes(res.status)) {
|
||||||
|
throw new Error(
|
||||||
|
`Crawl job "${jobId}" failed or was stopped. Status: ${res.status}`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Date.now() - start > timeoutMs) {
|
||||||
|
throw new Error(
|
||||||
|
`Timeout waiting for crawl job "${jobId}" to complete: ${res.status}`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
await delay(1000)
|
||||||
|
} while (true)
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,6 +2,7 @@ export * from './clearbit-client.js'
|
||||||
export * from './dexa-client.js'
|
export * from './dexa-client.js'
|
||||||
export * from './diffbot-client.js'
|
export * from './diffbot-client.js'
|
||||||
export * from './exa-client.js'
|
export * from './exa-client.js'
|
||||||
|
export * from './firecrawl-client.js'
|
||||||
export * from './people-data-labs-client.js'
|
export * from './people-data-labs-client.js'
|
||||||
export * from './perigon-client.js'
|
export * from './perigon-client.js'
|
||||||
export * from './predict-leads-client.js'
|
export * from './predict-leads-client.js'
|
||||||
|
|
Ładowanie…
Reference in New Issue