kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
Merge pull request #696 from ftonato/refactor/update-firecrawl-api-to-v1
feat: update Firecrawl API versionpull/699/head
commit
cfc1210e21
|
@ -2,29 +2,24 @@
|
|||
"name": "@agentic/firecrawl",
|
||||
"version": "7.3.5",
|
||||
"description": "Agentic SDK for Firecrawl.",
|
||||
"author": "Travis Fischer <travis@transitivebullsh.it>",
|
||||
"authors": [
|
||||
"Travis Fischer <travis@transitivebullsh.it>",
|
||||
"Ademílson Tonato <ademilsonft@outlook.com>"
|
||||
],
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/transitive-bullshit/agentic.git"
|
||||
},
|
||||
"type": "module",
|
||||
"source": "./src/index.ts",
|
||||
"types": "./dist/index.d.ts",
|
||||
"sideEffects": false,
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js",
|
||||
"default": "./dist/index.js"
|
||||
}
|
||||
},
|
||||
"main": "./dist/index.js",
|
||||
"module": "./dist/index.mjs",
|
||||
"types": "./dist/index.d.ts",
|
||||
"files": [
|
||||
"dist"
|
||||
"dist/**"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsup --config ../../tsup.config.ts",
|
||||
"dev": "tsup --config ../../tsup.config.ts --watch",
|
||||
"build": "tsup",
|
||||
"clean": "del dist",
|
||||
"test": "run-s test:*",
|
||||
"test:lint": "eslint .",
|
||||
|
@ -39,7 +34,10 @@
|
|||
"zod": "^3.24.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@agentic/tsconfig": "workspace:*"
|
||||
"@agentic/tsconfig": "workspace:*",
|
||||
"@types/node": "^20.11.16",
|
||||
"tsup": "^8.0.1",
|
||||
"typescript": "^5.3.3"
|
||||
},
|
||||
"publishConfig": {
|
||||
"access": "public"
|
||||
|
|
|
@ -1,16 +1,396 @@
|
|||
// import type * as z from 'zod'
|
||||
import {
|
||||
aiFunction,
|
||||
AIFunctionsProvider,
|
||||
assert,
|
||||
delay,
|
||||
getEnv,
|
||||
isZodSchema,
|
||||
throttleKy,
|
||||
zodToJsonSchema
|
||||
} from '@agentic/core'
|
||||
import defaultKy, { type KyInstance } from 'ky'
|
||||
import pThrottle from 'p-throttle'
|
||||
import { z } from 'zod'
|
||||
import { type z } from 'zod'
|
||||
|
||||
/**
|
||||
* Configuration interface for FirecrawlClient.
|
||||
*/
|
||||
export interface FirecrawlClientConfig {
|
||||
apiKey?: string
|
||||
apiBaseUrl?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Metadata for a Firecrawl document.
|
||||
*/
|
||||
export interface FirecrawlDocumentMetadata {
|
||||
title?: string
|
||||
description?: string
|
||||
language?: string
|
||||
keywords?: string
|
||||
robots?: string
|
||||
ogTitle?: string
|
||||
ogDescription?: string
|
||||
ogUrl?: string
|
||||
ogImage?: string
|
||||
ogAudio?: string
|
||||
ogDeterminer?: string
|
||||
ogLocale?: string
|
||||
ogLocaleAlternate?: string[]
|
||||
ogSiteName?: string
|
||||
ogVideo?: string
|
||||
dctermsCreated?: string
|
||||
dcDateCreated?: string
|
||||
dcDate?: string
|
||||
dctermsType?: string
|
||||
dcType?: string
|
||||
dctermsAudience?: string
|
||||
dctermsSubject?: string
|
||||
dcSubject?: string
|
||||
dcDescription?: string
|
||||
dctermsKeywords?: string
|
||||
modifiedTime?: string
|
||||
publishedTime?: string
|
||||
articleTag?: string
|
||||
articleSection?: string
|
||||
sourceURL?: string
|
||||
statusCode?: number
|
||||
error?: string
|
||||
[key: string]: any
|
||||
}
|
||||
|
||||
/**
|
||||
* Document interface for Firecrawl.
|
||||
*/
|
||||
export interface FirecrawlDocument<
|
||||
T = any,
|
||||
ActionsSchema extends ActionsResult | never = never
|
||||
> {
|
||||
url?: string
|
||||
markdown?: string
|
||||
html?: string
|
||||
rawHtml?: string
|
||||
links?: string[]
|
||||
extract?: T
|
||||
json?: T
|
||||
screenshot?: string
|
||||
metadata?: FirecrawlDocumentMetadata
|
||||
actions: ActionsSchema
|
||||
title?: string
|
||||
description?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for scraping operations.
|
||||
* Defines the options and configurations available for scraping web content.
|
||||
*/
|
||||
export interface ScrapeOptions {
|
||||
formats?: (
|
||||
| 'markdown'
|
||||
| 'html'
|
||||
| 'rawHtml'
|
||||
| 'content'
|
||||
| 'links'
|
||||
| 'screenshot'
|
||||
| 'screenshot@fullPage'
|
||||
| 'extract'
|
||||
| 'json'
|
||||
)[]
|
||||
headers?: Record<string, string>
|
||||
includeTags?: string[]
|
||||
excludeTags?: string[]
|
||||
onlyMainContent?: boolean
|
||||
waitFor?: number
|
||||
timeout?: number
|
||||
location?: {
|
||||
country?: string
|
||||
languages?: string[]
|
||||
}
|
||||
mobile?: boolean
|
||||
skipTlsVerification?: boolean
|
||||
removeBase64Images?: boolean
|
||||
blockAds?: boolean
|
||||
proxy?: 'basic' | 'stealth'
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for scraping operations.
|
||||
*/
|
||||
export interface ScrapeParams<
|
||||
LLMSchema extends z.ZodSchema = any,
|
||||
ActionsSchema extends Action[] | undefined = undefined
|
||||
> {
|
||||
formats?: (
|
||||
| 'markdown'
|
||||
| 'html'
|
||||
| 'rawHtml'
|
||||
| 'content'
|
||||
| 'links'
|
||||
| 'screenshot'
|
||||
| 'screenshot@fullPage'
|
||||
| 'extract'
|
||||
| 'json'
|
||||
)[]
|
||||
headers?: Record<string, string>
|
||||
includeTags?: string[]
|
||||
excludeTags?: string[]
|
||||
onlyMainContent?: boolean
|
||||
waitFor?: number
|
||||
timeout?: number
|
||||
location?: {
|
||||
country?: string
|
||||
languages?: string[]
|
||||
}
|
||||
mobile?: boolean
|
||||
skipTlsVerification?: boolean
|
||||
removeBase64Images?: boolean
|
||||
blockAds?: boolean
|
||||
proxy?: 'basic' | 'stealth'
|
||||
extract?: {
|
||||
prompt?: string
|
||||
schema?: LLMSchema
|
||||
systemPrompt?: string
|
||||
}
|
||||
jsonOptions?: {
|
||||
prompt?: string
|
||||
schema?: LLMSchema
|
||||
systemPrompt?: string
|
||||
}
|
||||
actions?: ActionsSchema
|
||||
}
|
||||
|
||||
export type Action =
|
||||
| {
|
||||
type: 'wait'
|
||||
milliseconds?: number
|
||||
selector?: string
|
||||
}
|
||||
| {
|
||||
type: 'click'
|
||||
selector: string
|
||||
}
|
||||
| {
|
||||
type: 'screenshot'
|
||||
fullPage?: boolean
|
||||
}
|
||||
| {
|
||||
type: 'write'
|
||||
text: string
|
||||
}
|
||||
| {
|
||||
type: 'press'
|
||||
key: string
|
||||
}
|
||||
| {
|
||||
type: 'scroll'
|
||||
direction?: 'up' | 'down'
|
||||
selector?: string
|
||||
}
|
||||
| {
|
||||
type: 'scrape'
|
||||
}
|
||||
| {
|
||||
type: 'executeJavascript'
|
||||
script: string
|
||||
}
|
||||
|
||||
export interface ActionsResult {
|
||||
screenshots: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
*/
|
||||
export interface ScrapeResponse<
|
||||
LLMResult = any,
|
||||
ActionsSchema extends ActionsResult | never = never
|
||||
> extends FirecrawlDocument<LLMResult, ActionsSchema> {
|
||||
success: true
|
||||
warning?: string
|
||||
error?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for search operations.
|
||||
*/
|
||||
export interface SearchParams {
|
||||
limit?: number
|
||||
tbs?: string
|
||||
filter?: string
|
||||
lang?: string
|
||||
country?: string
|
||||
location?: string
|
||||
origin?: string
|
||||
timeout?: number
|
||||
scrapeOptions?: ScrapeParams
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for search operations.
|
||||
*/
|
||||
export interface SearchResponse {
|
||||
success: boolean
|
||||
data: FirecrawlDocument<undefined>[]
|
||||
warning?: string
|
||||
error?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for crawling operations.
|
||||
*/
|
||||
export interface CrawlParams {
|
||||
includePaths?: string[]
|
||||
excludePaths?: string[]
|
||||
maxDepth?: number
|
||||
maxDiscoveryDepth?: number
|
||||
limit?: number
|
||||
allowBackwardLinks?: boolean
|
||||
allowExternalLinks?: boolean
|
||||
ignoreSitemap?: boolean
|
||||
scrapeOptions?: ScrapeParams
|
||||
webhook?:
|
||||
| string
|
||||
| {
|
||||
url: string
|
||||
headers?: Record<string, string>
|
||||
metadata?: Record<string, string>
|
||||
events?: ['completed', 'failed', 'page', 'started'][number][]
|
||||
}
|
||||
deduplicateSimilarURLs?: boolean
|
||||
ignoreQueryParameters?: boolean
|
||||
regexOnFullURL?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
*/
|
||||
export interface CrawlResponse {
|
||||
id?: string
|
||||
url?: string
|
||||
success: true
|
||||
error?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for job status checks.
|
||||
*/
|
||||
export interface CrawlStatusResponse {
|
||||
success: true
|
||||
status: 'scraping' | 'completed' | 'failed' | 'cancelled'
|
||||
completed: number
|
||||
total: number
|
||||
creditsUsed: number
|
||||
expiresAt: Date
|
||||
next?: string
|
||||
data: FirecrawlDocument<undefined>[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for crawl errors.
|
||||
*/
|
||||
export interface CrawlErrorsResponse {
|
||||
errors: {
|
||||
id: string
|
||||
timestamp?: string
|
||||
url: string
|
||||
error: string
|
||||
}[]
|
||||
robotsBlocked: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Error response interface.
|
||||
*/
|
||||
export interface ErrorResponse {
|
||||
success: false
|
||||
error: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom error class for Firecrawl.
|
||||
*/
|
||||
export class FirecrawlError extends Error {
|
||||
statusCode: number
|
||||
details?: any
|
||||
|
||||
constructor(message: string, statusCode: number, details?: any) {
|
||||
super(message)
|
||||
this.statusCode = statusCode
|
||||
this.details = details
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for extracting information from URLs.
|
||||
*/
|
||||
export interface ExtractParams<T extends z.ZodSchema = any> {
|
||||
prompt: string
|
||||
schema?: T
|
||||
enableWebSearch?: boolean
|
||||
ignoreSitemap?: boolean
|
||||
includeSubdomains?: boolean
|
||||
showSources?: boolean
|
||||
scrapeOptions?: ScrapeOptions
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for extracting information from URLs.
|
||||
* Defines the structure of the response received after extracting information from URLs.
|
||||
*/
|
||||
export interface ExtractResponse<T = any> {
|
||||
success: boolean
|
||||
id?: string
|
||||
data: T
|
||||
error?: string
|
||||
warning?: string
|
||||
sources?: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for extract status operations.
|
||||
*/
|
||||
export interface ExtractStatusResponse<T = any> {
|
||||
success: boolean
|
||||
status: 'processing' | 'completed' | 'failed'
|
||||
data?: T
|
||||
error?: string
|
||||
expiresAt?: string
|
||||
}
|
||||
/**
|
||||
* Parameters for LLMs.txt generation operations.
|
||||
*/
|
||||
export interface GenerateLLMsTextParams {
|
||||
/**
|
||||
* Maximum number of URLs to process (1-100)
|
||||
* @default 10
|
||||
*/
|
||||
maxUrls?: number
|
||||
/**
|
||||
* Whether to show the full LLMs-full.txt in the response
|
||||
* @default false
|
||||
*/
|
||||
showFullText?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for LLMs.txt generation operations.
|
||||
*/
|
||||
export interface GenerateLLMsTextResponse {
|
||||
success: boolean
|
||||
id: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Status response interface for LLMs.txt generation operations.
|
||||
*/
|
||||
export interface GenerateLLMsTextStatusResponse {
|
||||
success: boolean
|
||||
data: {
|
||||
llmstxt: string
|
||||
llmsfulltxt?: string
|
||||
}
|
||||
status: 'processing' | 'completed' | 'failed'
|
||||
error?: string
|
||||
expiresAt: string
|
||||
}
|
||||
|
||||
export namespace firecrawl {
|
||||
export const BASE_URL = 'https://api.firecrawl.dev'
|
||||
|
@ -21,79 +401,6 @@ export namespace firecrawl {
|
|||
interval: 1200,
|
||||
strict: true
|
||||
})
|
||||
|
||||
/**
|
||||
* Generic parameter interface.
|
||||
*/
|
||||
export interface Params {
|
||||
extractorOptions?: {
|
||||
extractionSchema: z.ZodSchema | any
|
||||
mode?: 'llm-extraction'
|
||||
extractionPrompt?: string
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
*/
|
||||
export interface ScrapeResponse {
|
||||
success: boolean
|
||||
data?: Data
|
||||
error?: string
|
||||
}
|
||||
|
||||
export interface Data {
|
||||
content?: string
|
||||
markdown?: string
|
||||
html?: string
|
||||
metadata: Metadata
|
||||
}
|
||||
|
||||
export interface Metadata {
|
||||
title: string
|
||||
description: string
|
||||
keywords?: string
|
||||
robots?: string
|
||||
ogTitle?: string
|
||||
ogDescription?: string
|
||||
ogUrl?: string
|
||||
ogImage?: string
|
||||
ogLocaleAlternate?: any[]
|
||||
ogSiteName?: string
|
||||
sourceURL?: string
|
||||
modifiedTime?: string
|
||||
publishedTime?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for searching operations.
|
||||
*/
|
||||
export interface SearchResponse {
|
||||
success: boolean
|
||||
data?: any
|
||||
error?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
*/
|
||||
export interface CrawlResponse {
|
||||
success: boolean
|
||||
jobId?: string
|
||||
data?: any
|
||||
error?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for job status checks.
|
||||
*/
|
||||
export interface JobStatusResponse {
|
||||
success: boolean
|
||||
status: string
|
||||
jobId?: string
|
||||
data?: any
|
||||
error?: string
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -140,124 +447,363 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
|||
prefixUrl: apiBaseUrl,
|
||||
timeout: timeoutMs,
|
||||
headers: {
|
||||
Authorization: `Bearer ${this.apiKey}`
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
'X-Origin': 'agentic',
|
||||
'X-Origin-Type': 'integration'
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape the contents of a URL.
|
||||
* Sends a POST request.
|
||||
*/
|
||||
@aiFunction({
|
||||
name: 'firecrawl_scrape_url',
|
||||
description: 'Scrape the contents of a URL.',
|
||||
inputSchema: z.object({
|
||||
url: z.string().url().describe('The URL to scrape.')
|
||||
})
|
||||
})
|
||||
async scrapeUrl(
|
||||
opts: {
|
||||
url: string
|
||||
} & firecrawl.Params
|
||||
) {
|
||||
const json = {
|
||||
...opts
|
||||
private async postRequest(path: string, data: any): Promise<any> {
|
||||
try {
|
||||
const response = await this.ky.post(path, { json: data })
|
||||
return await response.json()
|
||||
} catch (err) {
|
||||
if (err instanceof Error) {
|
||||
const response = await (err as any).response?.json()
|
||||
if (response?.error) {
|
||||
throw new FirecrawlError(
|
||||
`Request failed. Error: ${response.error}`,
|
||||
(err as any).response?.status ?? 500,
|
||||
response?.details
|
||||
)
|
||||
}
|
||||
}
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
if (opts?.extractorOptions?.extractionSchema) {
|
||||
let schema = opts.extractorOptions.extractionSchema
|
||||
if (isZodSchema(schema)) {
|
||||
/**
|
||||
* Sends a GET request.
|
||||
*/
|
||||
private async getRequest(path: string): Promise<any> {
|
||||
try {
|
||||
const response = await this.ky.get(path)
|
||||
return await response.json()
|
||||
} catch (err) {
|
||||
if (err instanceof Error) {
|
||||
const response = await (err as any).response?.json()
|
||||
if (response?.error) {
|
||||
throw new FirecrawlError(
|
||||
`Request failed. Error: ${response.error}`,
|
||||
(err as any).response?.status ?? 500,
|
||||
response?.details
|
||||
)
|
||||
}
|
||||
}
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a DELETE request.
|
||||
*/
|
||||
private async deleteRequest(path: string): Promise<any> {
|
||||
try {
|
||||
const response = await this.ky.delete(path)
|
||||
return await response.json()
|
||||
} catch (err) {
|
||||
if (err instanceof Error) {
|
||||
const response = await (err as any).response?.json()
|
||||
if (response?.error) {
|
||||
throw new FirecrawlError(
|
||||
`Request failed. Error: ${response.error}`,
|
||||
(err as any).response?.status ?? 500,
|
||||
response?.details
|
||||
)
|
||||
}
|
||||
}
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrapes a URL using the Firecrawl API.
|
||||
*/
|
||||
async scrapeUrl<
|
||||
T extends z.ZodSchema,
|
||||
ActionsSchema extends Action[] | undefined = undefined
|
||||
>(
|
||||
url: string,
|
||||
params?: ScrapeParams<T, ActionsSchema>
|
||||
): Promise<
|
||||
| ScrapeResponse<
|
||||
z.infer<T>,
|
||||
ActionsSchema extends Action[] ? ActionsResult : never
|
||||
>
|
||||
| ErrorResponse
|
||||
> {
|
||||
let jsonData: any = { url, ...params }
|
||||
|
||||
if (jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema
|
||||
try {
|
||||
schema = zodToJsonSchema(schema)
|
||||
}
|
||||
|
||||
json.extractorOptions = {
|
||||
mode: 'llm-extraction',
|
||||
...opts.extractorOptions,
|
||||
extractionSchema: schema
|
||||
} catch {}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extract: {
|
||||
...jsonData.extract,
|
||||
schema
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return this.ky.post('v0/scrape', { json }).json<firecrawl.ScrapeResponse>()
|
||||
}
|
||||
|
||||
async search(
|
||||
opts: {
|
||||
query: string
|
||||
} & firecrawl.Params
|
||||
) {
|
||||
return this.ky
|
||||
.post('v0/search', { json: opts })
|
||||
.json<firecrawl.SearchResponse>()
|
||||
}
|
||||
|
||||
async crawlUrl({
|
||||
waitUntilDone = true,
|
||||
timeoutMs = 30_000,
|
||||
idempotencyKey,
|
||||
...params
|
||||
}: {
|
||||
url: string
|
||||
waitUntilDone?: boolean
|
||||
timeoutMs?: number
|
||||
idempotencyKey?: string
|
||||
} & firecrawl.Params) {
|
||||
const res = await this.ky
|
||||
.post('v0/crawl', {
|
||||
json: params,
|
||||
timeout: timeoutMs,
|
||||
headers: idempotencyKey
|
||||
? {
|
||||
'x-idempotency-key': idempotencyKey
|
||||
}
|
||||
: undefined
|
||||
})
|
||||
.json<firecrawl.CrawlResponse>()
|
||||
|
||||
assert(res.jobId)
|
||||
if (waitUntilDone) {
|
||||
return this.waitForCrawlJob({ jobId: res.jobId, timeoutMs })
|
||||
if (jsonData?.jsonOptions?.schema) {
|
||||
let schema = jsonData.jsonOptions.schema
|
||||
try {
|
||||
schema = zodToJsonSchema(schema)
|
||||
} catch {}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
jsonOptions: {
|
||||
...jsonData.jsonOptions,
|
||||
schema
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
try {
|
||||
const response = await this.postRequest('v1/scrape', jsonData)
|
||||
return response
|
||||
} catch (err) {
|
||||
if (err instanceof FirecrawlError) {
|
||||
throw err
|
||||
}
|
||||
throw new FirecrawlError(
|
||||
err instanceof Error ? err.message : 'Unknown error',
|
||||
500
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async checkCrawlStatus(jobId: string) {
|
||||
assert(jobId)
|
||||
/**
|
||||
* Searches using the Firecrawl API.
|
||||
*/
|
||||
async search(query: string, params?: SearchParams): Promise<SearchResponse> {
|
||||
const jsonData = {
|
||||
query,
|
||||
limit: params?.limit ?? 5,
|
||||
tbs: params?.tbs,
|
||||
filter: params?.filter,
|
||||
lang: params?.lang ?? 'en',
|
||||
country: params?.country ?? 'us',
|
||||
location: params?.location,
|
||||
origin: params?.origin ?? 'api',
|
||||
timeout: params?.timeout ?? 60_000,
|
||||
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
||||
}
|
||||
|
||||
return this.ky
|
||||
.get(`v0/crawl/status/${jobId}`)
|
||||
.json<firecrawl.JobStatusResponse>()
|
||||
try {
|
||||
const response = await this.postRequest('v1/search', jsonData)
|
||||
if (response.success) {
|
||||
return {
|
||||
success: true,
|
||||
data: response.data as FirecrawlDocument<any>[],
|
||||
warning: response.warning
|
||||
}
|
||||
} else {
|
||||
throw new FirecrawlError(
|
||||
`Failed to search. Error: ${response.error}`,
|
||||
500
|
||||
)
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err.response?.data?.error) {
|
||||
throw new FirecrawlError(
|
||||
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
|
||||
err.response.status
|
||||
)
|
||||
} else {
|
||||
throw new FirecrawlError(err.message, 500)
|
||||
}
|
||||
}
|
||||
return { success: false, error: 'Internal server error.', data: [] }
|
||||
}
|
||||
|
||||
async waitForCrawlJob({
|
||||
jobId,
|
||||
timeoutMs = 60_000
|
||||
}: {
|
||||
jobId: string
|
||||
timeoutMs?: number
|
||||
}) {
|
||||
assert(jobId)
|
||||
/**
|
||||
* Initiates a crawl job for a URL.
|
||||
*/
|
||||
async crawlUrl(
|
||||
url: string,
|
||||
params?: CrawlParams
|
||||
): Promise<CrawlResponse | ErrorResponse> {
|
||||
const jsonData = { url, ...params }
|
||||
|
||||
const start = Date.now()
|
||||
do {
|
||||
const res = await this.checkCrawlStatus(jobId)
|
||||
if (res.status === 'completed') {
|
||||
return res
|
||||
}
|
||||
|
||||
if (!['active', 'paused', 'pending', 'queued'].includes(res.status)) {
|
||||
throw new Error(
|
||||
`Crawl job "${jobId}" failed or was stopped. Status: ${res.status}`
|
||||
try {
|
||||
const response = await this.postRequest('v1/crawl', jsonData)
|
||||
if (response.success) {
|
||||
return response
|
||||
} else {
|
||||
throw new FirecrawlError(
|
||||
`Failed to start crawl job. Error: ${response.error}`,
|
||||
500
|
||||
)
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err.response?.data?.error) {
|
||||
throw new FirecrawlError(
|
||||
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
|
||||
err.response.status
|
||||
)
|
||||
} else {
|
||||
throw new FirecrawlError(err.message, 500)
|
||||
}
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' }
|
||||
}
|
||||
|
||||
if (Date.now() - start > timeoutMs) {
|
||||
throw new Error(
|
||||
`Timeout waiting for crawl job "${jobId}" to complete: ${res.status}`
|
||||
/**
|
||||
* Checks the status of a crawl job.
|
||||
*/
|
||||
async checkCrawlStatus(
|
||||
id: string
|
||||
): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||
if (!id) {
|
||||
throw new FirecrawlError('No crawl ID provided', 400)
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await this.getRequest(`v1/crawl/${id}`)
|
||||
if (response.success) {
|
||||
return response
|
||||
} else {
|
||||
throw new FirecrawlError(
|
||||
`Failed to check crawl status. Error: ${response.error}`,
|
||||
500
|
||||
)
|
||||
}
|
||||
} catch (err: any) {
|
||||
throw new FirecrawlError(err.message, 500)
|
||||
}
|
||||
}
|
||||
|
||||
await delay(1000)
|
||||
} while (true)
|
||||
/**
|
||||
* Returns information about crawl errors.
|
||||
*/
|
||||
async checkCrawlErrors(
|
||||
id: string
|
||||
): Promise<CrawlErrorsResponse | ErrorResponse> {
|
||||
try {
|
||||
const response = await this.getRequest(`v1/crawl/${id}/errors`)
|
||||
if (response.errors) {
|
||||
return response
|
||||
} else {
|
||||
throw new FirecrawlError(
|
||||
`Failed to check crawl errors. Error: ${response.error}`,
|
||||
500
|
||||
)
|
||||
}
|
||||
} catch (err: any) {
|
||||
throw new FirecrawlError(err.message, 500)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancels a crawl job.
|
||||
*/
|
||||
async cancelCrawl(id: string): Promise<ErrorResponse> {
|
||||
try {
|
||||
const response = await this.deleteRequest(`v1/crawl/${id}`)
|
||||
if (response.status) {
|
||||
return response
|
||||
} else {
|
||||
throw new FirecrawlError(
|
||||
`Failed to cancel crawl job. Error: ${response.error}`,
|
||||
500
|
||||
)
|
||||
}
|
||||
} catch (err: any) {
|
||||
throw new FirecrawlError(err.message, 500)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts structured data from URLs using LLMs.
|
||||
* @param urls - Array of URLs to extract data from
|
||||
* @param params - Additional parameters for the extract request
|
||||
* @returns The response from the extract operation
|
||||
*/
|
||||
async extract<T extends z.ZodSchema>(
|
||||
urls: string[],
|
||||
params: ExtractParams<T>
|
||||
): Promise<ExtractResponse<z.infer<T>>> {
|
||||
const jsonData = {
|
||||
urls,
|
||||
...params,
|
||||
schema: params.schema ? zodToJsonSchema(params.schema) : undefined
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await this.postRequest('v1/extract', jsonData)
|
||||
if (!response.success) {
|
||||
throw new FirecrawlError(
|
||||
response.error || 'Extract operation failed',
|
||||
500
|
||||
)
|
||||
}
|
||||
return response
|
||||
} catch (err) {
|
||||
if (err instanceof FirecrawlError) {
|
||||
throw err
|
||||
}
|
||||
throw new FirecrawlError(
|
||||
err instanceof Error ? err.message : 'Unknown error',
|
||||
500
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks the status of an extract operation.
|
||||
*/
|
||||
async checkExtractStatus<T = any>(
|
||||
id: string
|
||||
): Promise<ExtractStatusResponse<T>> {
|
||||
if (!id) {
|
||||
throw new FirecrawlError('No extract ID provided', 400)
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await this.getRequest(`v1/extract/${id}`)
|
||||
return response
|
||||
} catch (err) {
|
||||
if (err instanceof FirecrawlError) {
|
||||
throw err
|
||||
}
|
||||
throw new FirecrawlError(
|
||||
err instanceof Error ? err.message : 'Unknown error',
|
||||
500
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates LLMs.txt for a given URL.
|
||||
*/
|
||||
async generateLLMsText(
|
||||
url: string,
|
||||
params: GenerateLLMsTextParams
|
||||
): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
|
||||
const jsonData = {
|
||||
url,
|
||||
...params
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await this.postRequest('v1/llmstxt', jsonData)
|
||||
return response
|
||||
} catch (err) {
|
||||
if (err instanceof FirecrawlError) {
|
||||
throw err
|
||||
}
|
||||
throw new FirecrawlError(
|
||||
err instanceof Error ? err.message : 'Unknown error',
|
||||
500
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1 +1,13 @@
|
|||
export * from './firecrawl-client'
|
||||
export type {
|
||||
ErrorResponse,
|
||||
ExtractParams,
|
||||
ExtractResponse,
|
||||
ExtractStatusResponse,
|
||||
FirecrawlClientConfig,
|
||||
GenerateLLMsTextParams,
|
||||
GenerateLLMsTextResponse,
|
||||
GenerateLLMsTextStatusResponse,
|
||||
ScrapeParams,
|
||||
ScrapeResponse
|
||||
} from './firecrawl-client.js'
|
||||
export { FirecrawlClient } from './firecrawl-client.js'
|
||||
|
|
|
@ -0,0 +1,194 @@
|
|||
import { z } from 'zod'
|
||||
|
||||
import { FirecrawlClient } from './dist/index.mjs'
|
||||
|
||||
// Initialize the client with the API key
|
||||
const apiKey = 'FIRECRAWL-API-KEY'
|
||||
const firecrawl = new FirecrawlClient({ apiKey })
|
||||
|
||||
// =============================================
|
||||
// Test 1: URL Scraping
|
||||
// =============================================
|
||||
async function testUrlScraping() {
|
||||
console.log('🔍 Testing URL scraping...')
|
||||
try {
|
||||
const result = await firecrawl.scrapeUrl('https://mairistumpf.com')
|
||||
console.log('✅ URL scraping successful!')
|
||||
console.log('Result:', result)
|
||||
} catch (err) {
|
||||
console.error('❌ URL scraping failed:', err)
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================
|
||||
// Test 2: Search
|
||||
// =============================================
|
||||
async function testSearch() {
|
||||
console.log('\n🔍 Testing search...')
|
||||
try {
|
||||
const result = await firecrawl.search('artificial intelligence news', {
|
||||
limit: 5,
|
||||
lang: 'en',
|
||||
country: 'us'
|
||||
})
|
||||
console.log('✅ Search successful!')
|
||||
console.log('Results:', result.data)
|
||||
console.log('Results:', result.data.length)
|
||||
} catch (err) {
|
||||
console.error('❌ Search failed:', err)
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================
|
||||
// Test 3: Crawl URL
|
||||
// =============================================
|
||||
async function testCrawlUrl() {
|
||||
console.log('\n🔍 Testing URL crawling...')
|
||||
try {
|
||||
const result = await firecrawl.crawlUrl('https://example.com', {
|
||||
maxDepth: 2,
|
||||
limit: 5
|
||||
})
|
||||
console.log('✅ Crawl initiated successfully!')
|
||||
console.log('Result:', result)
|
||||
|
||||
if (result.success && result.id) {
|
||||
// Test crawl status
|
||||
console.log('\n🔍 Testing crawl status...')
|
||||
const statusResult = await firecrawl.checkCrawlStatus(result.id)
|
||||
console.log('✅ Crawl status check successful!')
|
||||
console.log('Status:', statusResult)
|
||||
|
||||
// Test crawl errors
|
||||
console.log('\n🔍 Testing crawl errors...')
|
||||
const errorsResult = await firecrawl.checkCrawlErrors(result.id)
|
||||
console.log('✅ Crawl errors check successful!')
|
||||
console.log('Errors:', errorsResult)
|
||||
|
||||
// Test crawl cancellation
|
||||
console.log('\n🔍 Testing crawl cancellation...')
|
||||
const cancelResult = await firecrawl.cancelCrawl(result.id)
|
||||
console.log('✅ Crawl cancellation successful!')
|
||||
console.log('Result:', cancelResult)
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('❌ Crawl operations failed:', err)
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================
|
||||
// Test 4: Extract
|
||||
// =============================================
|
||||
async function testExtract() {
|
||||
console.log('\n🔍 Testing extract...')
|
||||
try {
|
||||
const result = await firecrawl.extract(['https://firecrawl.dev'], {
|
||||
prompt: 'Extract the pricing information from the website',
|
||||
schema: z.object({
|
||||
pricing: z.object({
|
||||
free: z.object({
|
||||
price: z.number(),
|
||||
features: z.array(z.string())
|
||||
}),
|
||||
pro: z.object({
|
||||
price: z.number(),
|
||||
features: z.array(z.string())
|
||||
})
|
||||
})
|
||||
}),
|
||||
enableWebSearch: false,
|
||||
ignoreSitemap: false,
|
||||
includeSubdomains: true,
|
||||
showSources: false,
|
||||
scrapeOptions: {
|
||||
formats: ['markdown'],
|
||||
onlyMainContent: true,
|
||||
blockAds: true,
|
||||
proxy: 'basic',
|
||||
location: {
|
||||
country: 'US',
|
||||
languages: ['en-US']
|
||||
}
|
||||
}
|
||||
})
|
||||
console.log('✅ Extract successful!')
|
||||
console.log('Result:', result)
|
||||
|
||||
if (result.success && result.id) {
|
||||
// Test extract status
|
||||
console.log('\n🔍 Testing extract status...')
|
||||
const statusResult = await firecrawl.checkExtractStatus(result.id)
|
||||
console.log('✅ Extract status check successful!')
|
||||
console.log('Status:', statusResult)
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('❌ Extract failed:', err)
|
||||
}
|
||||
}
|
||||
|
||||
async function testExtractUntilCompletion() {
|
||||
console.log('\n🔍 Testing extract...')
|
||||
try {
|
||||
const result = await firecrawl.extract(['https://firecrawl.dev'], {
|
||||
prompt: 'Extract the pricing information from the website',
|
||||
schema: z.object({
|
||||
pricing: z.object({
|
||||
free: z.object({
|
||||
price: z.number(),
|
||||
features: z.array(z.string())
|
||||
}),
|
||||
pro: z.object({
|
||||
price: z.number(),
|
||||
features: z.array(z.string())
|
||||
})
|
||||
})
|
||||
}),
|
||||
enableWebSearch: false,
|
||||
ignoreSitemap: false,
|
||||
includeSubdomains: true,
|
||||
showSources: false,
|
||||
scrapeOptions: {
|
||||
formats: ['markdown'],
|
||||
onlyMainContent: true,
|
||||
blockAds: true,
|
||||
proxy: 'basic',
|
||||
location: {
|
||||
country: 'US',
|
||||
languages: ['en-US']
|
||||
}
|
||||
}
|
||||
})
|
||||
console.log('✅ Extract successful!')
|
||||
console.log('Result:', result)
|
||||
|
||||
if (result.success && result.id) {
|
||||
// Test extract status
|
||||
console.log('\n🔍 Testing extract status...')
|
||||
let statusResult = await firecrawl.checkExtractStatus(result.id)
|
||||
|
||||
while (statusResult.status === 'processing') {
|
||||
// wait 5 seconds and check again
|
||||
await new Promise((resolve) => setTimeout(resolve, 5000))
|
||||
statusResult = await firecrawl.checkExtractStatus(result.id)
|
||||
}
|
||||
console.log('✅ Extract status check successful!')
|
||||
console.log('Status:', statusResult)
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('❌ Extract failed:', err)
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================
|
||||
// Run all tests
|
||||
// =============================================
|
||||
console.log('🚀 Starting FirecrawlClient tests...\n')
|
||||
|
||||
// Run tests sequentially
|
||||
await testUrlScraping()
|
||||
await testSearch()
|
||||
await testCrawlUrl()
|
||||
await testExtract()
|
||||
await testExtractUntilCompletion()
|
||||
|
||||
console.log('\n🏁 All tests completed!')
|
Ładowanie…
Reference in New Issue