Merge pull request #696 from ftonato/refactor/update-firecrawl-api-to-v1

feat: update Firecrawl API version
pull/699/head
Travis Fischer 2025-03-18 20:41:54 +08:00 zatwierdzone przez GitHub
commit cfc1210e21
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
4 zmienionych plików z 936 dodań i 186 usunięć

Wyświetl plik

@ -2,29 +2,24 @@
"name": "@agentic/firecrawl",
"version": "7.3.5",
"description": "Agentic SDK for Firecrawl.",
"author": "Travis Fischer <travis@transitivebullsh.it>",
"authors": [
"Travis Fischer <travis@transitivebullsh.it>",
"Ademílson Tonato <ademilsonft@outlook.com>"
],
"license": "MIT",
"repository": {
"type": "git",
"url": "git+https://github.com/transitive-bullshit/agentic.git"
},
"type": "module",
"source": "./src/index.ts",
"types": "./dist/index.d.ts",
"sideEffects": false,
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js",
"default": "./dist/index.js"
}
},
"main": "./dist/index.js",
"module": "./dist/index.mjs",
"types": "./dist/index.d.ts",
"files": [
"dist"
"dist/**"
],
"scripts": {
"build": "tsup --config ../../tsup.config.ts",
"dev": "tsup --config ../../tsup.config.ts --watch",
"build": "tsup",
"clean": "del dist",
"test": "run-s test:*",
"test:lint": "eslint .",
@ -39,7 +34,10 @@
"zod": "^3.24.2"
},
"devDependencies": {
"@agentic/tsconfig": "workspace:*"
"@agentic/tsconfig": "workspace:*",
"@types/node": "^20.11.16",
"tsup": "^8.0.1",
"typescript": "^5.3.3"
},
"publishConfig": {
"access": "public"

Wyświetl plik

@ -1,16 +1,396 @@
// import type * as z from 'zod'
import {
aiFunction,
AIFunctionsProvider,
assert,
delay,
getEnv,
isZodSchema,
throttleKy,
zodToJsonSchema
} from '@agentic/core'
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { z } from 'zod'
import { type z } from 'zod'
/**
* Configuration interface for FirecrawlClient.
*/
export interface FirecrawlClientConfig {
apiKey?: string
apiBaseUrl?: string
}
/**
* Metadata for a Firecrawl document.
*/
export interface FirecrawlDocumentMetadata {
title?: string
description?: string
language?: string
keywords?: string
robots?: string
ogTitle?: string
ogDescription?: string
ogUrl?: string
ogImage?: string
ogAudio?: string
ogDeterminer?: string
ogLocale?: string
ogLocaleAlternate?: string[]
ogSiteName?: string
ogVideo?: string
dctermsCreated?: string
dcDateCreated?: string
dcDate?: string
dctermsType?: string
dcType?: string
dctermsAudience?: string
dctermsSubject?: string
dcSubject?: string
dcDescription?: string
dctermsKeywords?: string
modifiedTime?: string
publishedTime?: string
articleTag?: string
articleSection?: string
sourceURL?: string
statusCode?: number
error?: string
[key: string]: any
}
/**
* Document interface for Firecrawl.
*/
export interface FirecrawlDocument<
T = any,
ActionsSchema extends ActionsResult | never = never
> {
url?: string
markdown?: string
html?: string
rawHtml?: string
links?: string[]
extract?: T
json?: T
screenshot?: string
metadata?: FirecrawlDocumentMetadata
actions: ActionsSchema
title?: string
description?: string
}
/**
* Parameters for scraping operations.
* Defines the options and configurations available for scraping web content.
*/
export interface ScrapeOptions {
formats?: (
| 'markdown'
| 'html'
| 'rawHtml'
| 'content'
| 'links'
| 'screenshot'
| 'screenshot@fullPage'
| 'extract'
| 'json'
)[]
headers?: Record<string, string>
includeTags?: string[]
excludeTags?: string[]
onlyMainContent?: boolean
waitFor?: number
timeout?: number
location?: {
country?: string
languages?: string[]
}
mobile?: boolean
skipTlsVerification?: boolean
removeBase64Images?: boolean
blockAds?: boolean
proxy?: 'basic' | 'stealth'
}
/**
* Parameters for scraping operations.
*/
export interface ScrapeParams<
LLMSchema extends z.ZodSchema = any,
ActionsSchema extends Action[] | undefined = undefined
> {
formats?: (
| 'markdown'
| 'html'
| 'rawHtml'
| 'content'
| 'links'
| 'screenshot'
| 'screenshot@fullPage'
| 'extract'
| 'json'
)[]
headers?: Record<string, string>
includeTags?: string[]
excludeTags?: string[]
onlyMainContent?: boolean
waitFor?: number
timeout?: number
location?: {
country?: string
languages?: string[]
}
mobile?: boolean
skipTlsVerification?: boolean
removeBase64Images?: boolean
blockAds?: boolean
proxy?: 'basic' | 'stealth'
extract?: {
prompt?: string
schema?: LLMSchema
systemPrompt?: string
}
jsonOptions?: {
prompt?: string
schema?: LLMSchema
systemPrompt?: string
}
actions?: ActionsSchema
}
export type Action =
| {
type: 'wait'
milliseconds?: number
selector?: string
}
| {
type: 'click'
selector: string
}
| {
type: 'screenshot'
fullPage?: boolean
}
| {
type: 'write'
text: string
}
| {
type: 'press'
key: string
}
| {
type: 'scroll'
direction?: 'up' | 'down'
selector?: string
}
| {
type: 'scrape'
}
| {
type: 'executeJavascript'
script: string
}
export interface ActionsResult {
screenshots: string[]
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse<
LLMResult = any,
ActionsSchema extends ActionsResult | never = never
> extends FirecrawlDocument<LLMResult, ActionsSchema> {
success: true
warning?: string
error?: string
}
/**
* Parameters for search operations.
*/
export interface SearchParams {
limit?: number
tbs?: string
filter?: string
lang?: string
country?: string
location?: string
origin?: string
timeout?: number
scrapeOptions?: ScrapeParams
}
/**
* Response interface for search operations.
*/
export interface SearchResponse {
success: boolean
data: FirecrawlDocument<undefined>[]
warning?: string
error?: string
}
/**
* Parameters for crawling operations.
*/
export interface CrawlParams {
includePaths?: string[]
excludePaths?: string[]
maxDepth?: number
maxDiscoveryDepth?: number
limit?: number
allowBackwardLinks?: boolean
allowExternalLinks?: boolean
ignoreSitemap?: boolean
scrapeOptions?: ScrapeParams
webhook?:
| string
| {
url: string
headers?: Record<string, string>
metadata?: Record<string, string>
events?: ['completed', 'failed', 'page', 'started'][number][]
}
deduplicateSimilarURLs?: boolean
ignoreQueryParameters?: boolean
regexOnFullURL?: boolean
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
id?: string
url?: string
success: true
error?: string
}
/**
* Response interface for job status checks.
*/
export interface CrawlStatusResponse {
success: true
status: 'scraping' | 'completed' | 'failed' | 'cancelled'
completed: number
total: number
creditsUsed: number
expiresAt: Date
next?: string
data: FirecrawlDocument<undefined>[]
}
/**
* Response interface for crawl errors.
*/
export interface CrawlErrorsResponse {
errors: {
id: string
timestamp?: string
url: string
error: string
}[]
robotsBlocked: string[]
}
/**
* Error response interface.
*/
export interface ErrorResponse {
success: false
error: string
}
/**
* Custom error class for Firecrawl.
*/
export class FirecrawlError extends Error {
statusCode: number
details?: any
constructor(message: string, statusCode: number, details?: any) {
super(message)
this.statusCode = statusCode
this.details = details
}
}
/**
* Parameters for extracting information from URLs.
*/
export interface ExtractParams<T extends z.ZodSchema = any> {
prompt: string
schema?: T
enableWebSearch?: boolean
ignoreSitemap?: boolean
includeSubdomains?: boolean
showSources?: boolean
scrapeOptions?: ScrapeOptions
}
/**
* Response interface for extracting information from URLs.
* Defines the structure of the response received after extracting information from URLs.
*/
export interface ExtractResponse<T = any> {
success: boolean
id?: string
data: T
error?: string
warning?: string
sources?: string[]
}
/**
* Response interface for extract status operations.
*/
export interface ExtractStatusResponse<T = any> {
success: boolean
status: 'processing' | 'completed' | 'failed'
data?: T
error?: string
expiresAt?: string
}
/**
* Parameters for LLMs.txt generation operations.
*/
export interface GenerateLLMsTextParams {
/**
* Maximum number of URLs to process (1-100)
* @default 10
*/
maxUrls?: number
/**
* Whether to show the full LLMs-full.txt in the response
* @default false
*/
showFullText?: boolean
}
/**
* Response interface for LLMs.txt generation operations.
*/
export interface GenerateLLMsTextResponse {
success: boolean
id: string
}
/**
* Status response interface for LLMs.txt generation operations.
*/
export interface GenerateLLMsTextStatusResponse {
success: boolean
data: {
llmstxt: string
llmsfulltxt?: string
}
status: 'processing' | 'completed' | 'failed'
error?: string
expiresAt: string
}
export namespace firecrawl {
export const BASE_URL = 'https://api.firecrawl.dev'
@ -21,79 +401,6 @@ export namespace firecrawl {
interval: 1200,
strict: true
})
/**
* Generic parameter interface.
*/
export interface Params {
extractorOptions?: {
extractionSchema: z.ZodSchema | any
mode?: 'llm-extraction'
extractionPrompt?: string
}
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean
data?: Data
error?: string
}
export interface Data {
content?: string
markdown?: string
html?: string
metadata: Metadata
}
export interface Metadata {
title: string
description: string
keywords?: string
robots?: string
ogTitle?: string
ogDescription?: string
ogUrl?: string
ogImage?: string
ogLocaleAlternate?: any[]
ogSiteName?: string
sourceURL?: string
modifiedTime?: string
publishedTime?: string
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean
data?: any
error?: string
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean
jobId?: string
data?: any
error?: string
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean
status: string
jobId?: string
data?: any
error?: string
}
}
/**
@ -140,124 +447,363 @@ export class FirecrawlClient extends AIFunctionsProvider {
prefixUrl: apiBaseUrl,
timeout: timeoutMs,
headers: {
Authorization: `Bearer ${this.apiKey}`
Authorization: `Bearer ${this.apiKey}`,
'X-Origin': 'agentic',
'X-Origin-Type': 'integration'
}
})
}
/**
* Scrape the contents of a URL.
* Sends a POST request.
*/
@aiFunction({
name: 'firecrawl_scrape_url',
description: 'Scrape the contents of a URL.',
inputSchema: z.object({
url: z.string().url().describe('The URL to scrape.')
})
})
async scrapeUrl(
opts: {
url: string
} & firecrawl.Params
) {
const json = {
...opts
private async postRequest(path: string, data: any): Promise<any> {
try {
const response = await this.ky.post(path, { json: data })
return await response.json()
} catch (err) {
if (err instanceof Error) {
const response = await (err as any).response?.json()
if (response?.error) {
throw new FirecrawlError(
`Request failed. Error: ${response.error}`,
(err as any).response?.status ?? 500,
response?.details
)
}
}
throw err
}
}
if (opts?.extractorOptions?.extractionSchema) {
let schema = opts.extractorOptions.extractionSchema
if (isZodSchema(schema)) {
/**
* Sends a GET request.
*/
private async getRequest(path: string): Promise<any> {
try {
const response = await this.ky.get(path)
return await response.json()
} catch (err) {
if (err instanceof Error) {
const response = await (err as any).response?.json()
if (response?.error) {
throw new FirecrawlError(
`Request failed. Error: ${response.error}`,
(err as any).response?.status ?? 500,
response?.details
)
}
}
throw err
}
}
/**
* Sends a DELETE request.
*/
private async deleteRequest(path: string): Promise<any> {
try {
const response = await this.ky.delete(path)
return await response.json()
} catch (err) {
if (err instanceof Error) {
const response = await (err as any).response?.json()
if (response?.error) {
throw new FirecrawlError(
`Request failed. Error: ${response.error}`,
(err as any).response?.status ?? 500,
response?.details
)
}
}
throw err
}
}
/**
* Scrapes a URL using the Firecrawl API.
*/
async scrapeUrl<
T extends z.ZodSchema,
ActionsSchema extends Action[] | undefined = undefined
>(
url: string,
params?: ScrapeParams<T, ActionsSchema>
): Promise<
| ScrapeResponse<
z.infer<T>,
ActionsSchema extends Action[] ? ActionsResult : never
>
| ErrorResponse
> {
let jsonData: any = { url, ...params }
if (jsonData?.extract?.schema) {
let schema = jsonData.extract.schema
try {
schema = zodToJsonSchema(schema)
}
json.extractorOptions = {
mode: 'llm-extraction',
...opts.extractorOptions,
extractionSchema: schema
} catch {}
jsonData = {
...jsonData,
extract: {
...jsonData.extract,
schema
}
}
}
return this.ky.post('v0/scrape', { json }).json<firecrawl.ScrapeResponse>()
}
async search(
opts: {
query: string
} & firecrawl.Params
) {
return this.ky
.post('v0/search', { json: opts })
.json<firecrawl.SearchResponse>()
}
async crawlUrl({
waitUntilDone = true,
timeoutMs = 30_000,
idempotencyKey,
...params
}: {
url: string
waitUntilDone?: boolean
timeoutMs?: number
idempotencyKey?: string
} & firecrawl.Params) {
const res = await this.ky
.post('v0/crawl', {
json: params,
timeout: timeoutMs,
headers: idempotencyKey
? {
'x-idempotency-key': idempotencyKey
}
: undefined
})
.json<firecrawl.CrawlResponse>()
assert(res.jobId)
if (waitUntilDone) {
return this.waitForCrawlJob({ jobId: res.jobId, timeoutMs })
if (jsonData?.jsonOptions?.schema) {
let schema = jsonData.jsonOptions.schema
try {
schema = zodToJsonSchema(schema)
} catch {}
jsonData = {
...jsonData,
jsonOptions: {
...jsonData.jsonOptions,
schema
}
}
}
return res
try {
const response = await this.postRequest('v1/scrape', jsonData)
return response
} catch (err) {
if (err instanceof FirecrawlError) {
throw err
}
throw new FirecrawlError(
err instanceof Error ? err.message : 'Unknown error',
500
)
}
}
async checkCrawlStatus(jobId: string) {
assert(jobId)
/**
* Searches using the Firecrawl API.
*/
async search(query: string, params?: SearchParams): Promise<SearchResponse> {
const jsonData = {
query,
limit: params?.limit ?? 5,
tbs: params?.tbs,
filter: params?.filter,
lang: params?.lang ?? 'en',
country: params?.country ?? 'us',
location: params?.location,
origin: params?.origin ?? 'api',
timeout: params?.timeout ?? 60_000,
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
}
return this.ky
.get(`v0/crawl/status/${jobId}`)
.json<firecrawl.JobStatusResponse>()
try {
const response = await this.postRequest('v1/search', jsonData)
if (response.success) {
return {
success: true,
data: response.data as FirecrawlDocument<any>[],
warning: response.warning
}
} else {
throw new FirecrawlError(
`Failed to search. Error: ${response.error}`,
500
)
}
} catch (err: any) {
if (err.response?.data?.error) {
throw new FirecrawlError(
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
err.response.status
)
} else {
throw new FirecrawlError(err.message, 500)
}
}
return { success: false, error: 'Internal server error.', data: [] }
}
async waitForCrawlJob({
jobId,
timeoutMs = 60_000
}: {
jobId: string
timeoutMs?: number
}) {
assert(jobId)
/**
* Initiates a crawl job for a URL.
*/
async crawlUrl(
url: string,
params?: CrawlParams
): Promise<CrawlResponse | ErrorResponse> {
const jsonData = { url, ...params }
const start = Date.now()
do {
const res = await this.checkCrawlStatus(jobId)
if (res.status === 'completed') {
return res
}
if (!['active', 'paused', 'pending', 'queued'].includes(res.status)) {
throw new Error(
`Crawl job "${jobId}" failed or was stopped. Status: ${res.status}`
try {
const response = await this.postRequest('v1/crawl', jsonData)
if (response.success) {
return response
} else {
throw new FirecrawlError(
`Failed to start crawl job. Error: ${response.error}`,
500
)
}
} catch (err: any) {
if (err.response?.data?.error) {
throw new FirecrawlError(
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
err.response.status
)
} else {
throw new FirecrawlError(err.message, 500)
}
}
return { success: false, error: 'Internal server error.' }
}
if (Date.now() - start > timeoutMs) {
throw new Error(
`Timeout waiting for crawl job "${jobId}" to complete: ${res.status}`
/**
* Checks the status of a crawl job.
*/
async checkCrawlStatus(
id: string
): Promise<CrawlStatusResponse | ErrorResponse> {
if (!id) {
throw new FirecrawlError('No crawl ID provided', 400)
}
try {
const response = await this.getRequest(`v1/crawl/${id}`)
if (response.success) {
return response
} else {
throw new FirecrawlError(
`Failed to check crawl status. Error: ${response.error}`,
500
)
}
} catch (err: any) {
throw new FirecrawlError(err.message, 500)
}
}
await delay(1000)
} while (true)
/**
* Returns information about crawl errors.
*/
async checkCrawlErrors(
id: string
): Promise<CrawlErrorsResponse | ErrorResponse> {
try {
const response = await this.getRequest(`v1/crawl/${id}/errors`)
if (response.errors) {
return response
} else {
throw new FirecrawlError(
`Failed to check crawl errors. Error: ${response.error}`,
500
)
}
} catch (err: any) {
throw new FirecrawlError(err.message, 500)
}
}
/**
* Cancels a crawl job.
*/
async cancelCrawl(id: string): Promise<ErrorResponse> {
try {
const response = await this.deleteRequest(`v1/crawl/${id}`)
if (response.status) {
return response
} else {
throw new FirecrawlError(
`Failed to cancel crawl job. Error: ${response.error}`,
500
)
}
} catch (err: any) {
throw new FirecrawlError(err.message, 500)
}
}
/**
* Extracts structured data from URLs using LLMs.
* @param urls - Array of URLs to extract data from
* @param params - Additional parameters for the extract request
* @returns The response from the extract operation
*/
async extract<T extends z.ZodSchema>(
urls: string[],
params: ExtractParams<T>
): Promise<ExtractResponse<z.infer<T>>> {
const jsonData = {
urls,
...params,
schema: params.schema ? zodToJsonSchema(params.schema) : undefined
}
try {
const response = await this.postRequest('v1/extract', jsonData)
if (!response.success) {
throw new FirecrawlError(
response.error || 'Extract operation failed',
500
)
}
return response
} catch (err) {
if (err instanceof FirecrawlError) {
throw err
}
throw new FirecrawlError(
err instanceof Error ? err.message : 'Unknown error',
500
)
}
}
/**
* Checks the status of an extract operation.
*/
async checkExtractStatus<T = any>(
id: string
): Promise<ExtractStatusResponse<T>> {
if (!id) {
throw new FirecrawlError('No extract ID provided', 400)
}
try {
const response = await this.getRequest(`v1/extract/${id}`)
return response
} catch (err) {
if (err instanceof FirecrawlError) {
throw err
}
throw new FirecrawlError(
err instanceof Error ? err.message : 'Unknown error',
500
)
}
}
/**
* Generates LLMs.txt for a given URL.
*/
async generateLLMsText(
url: string,
params: GenerateLLMsTextParams
): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
const jsonData = {
url,
...params
}
try {
const response = await this.postRequest('v1/llmstxt', jsonData)
return response
} catch (err) {
if (err instanceof FirecrawlError) {
throw err
}
throw new FirecrawlError(
err instanceof Error ? err.message : 'Unknown error',
500
)
}
}
}

Wyświetl plik

@ -1 +1,13 @@
export * from './firecrawl-client'
export type {
ErrorResponse,
ExtractParams,
ExtractResponse,
ExtractStatusResponse,
FirecrawlClientConfig,
GenerateLLMsTextParams,
GenerateLLMsTextResponse,
GenerateLLMsTextStatusResponse,
ScrapeParams,
ScrapeResponse
} from './firecrawl-client.js'
export { FirecrawlClient } from './firecrawl-client.js'

Wyświetl plik

@ -0,0 +1,194 @@
import { z } from 'zod'
import { FirecrawlClient } from './dist/index.mjs'
// Initialize the client with the API key
const apiKey = 'FIRECRAWL-API-KEY'
const firecrawl = new FirecrawlClient({ apiKey })
// =============================================
// Test 1: URL Scraping
// =============================================
async function testUrlScraping() {
console.log('🔍 Testing URL scraping...')
try {
const result = await firecrawl.scrapeUrl('https://mairistumpf.com')
console.log('✅ URL scraping successful!')
console.log('Result:', result)
} catch (err) {
console.error('❌ URL scraping failed:', err)
}
}
// =============================================
// Test 2: Search
// =============================================
async function testSearch() {
console.log('\n🔍 Testing search...')
try {
const result = await firecrawl.search('artificial intelligence news', {
limit: 5,
lang: 'en',
country: 'us'
})
console.log('✅ Search successful!')
console.log('Results:', result.data)
console.log('Results:', result.data.length)
} catch (err) {
console.error('❌ Search failed:', err)
}
}
// =============================================
// Test 3: Crawl URL
// =============================================
async function testCrawlUrl() {
console.log('\n🔍 Testing URL crawling...')
try {
const result = await firecrawl.crawlUrl('https://example.com', {
maxDepth: 2,
limit: 5
})
console.log('✅ Crawl initiated successfully!')
console.log('Result:', result)
if (result.success && result.id) {
// Test crawl status
console.log('\n🔍 Testing crawl status...')
const statusResult = await firecrawl.checkCrawlStatus(result.id)
console.log('✅ Crawl status check successful!')
console.log('Status:', statusResult)
// Test crawl errors
console.log('\n🔍 Testing crawl errors...')
const errorsResult = await firecrawl.checkCrawlErrors(result.id)
console.log('✅ Crawl errors check successful!')
console.log('Errors:', errorsResult)
// Test crawl cancellation
console.log('\n🔍 Testing crawl cancellation...')
const cancelResult = await firecrawl.cancelCrawl(result.id)
console.log('✅ Crawl cancellation successful!')
console.log('Result:', cancelResult)
}
} catch (err) {
console.error('❌ Crawl operations failed:', err)
}
}
// =============================================
// Test 4: Extract
// =============================================
async function testExtract() {
console.log('\n🔍 Testing extract...')
try {
const result = await firecrawl.extract(['https://firecrawl.dev'], {
prompt: 'Extract the pricing information from the website',
schema: z.object({
pricing: z.object({
free: z.object({
price: z.number(),
features: z.array(z.string())
}),
pro: z.object({
price: z.number(),
features: z.array(z.string())
})
})
}),
enableWebSearch: false,
ignoreSitemap: false,
includeSubdomains: true,
showSources: false,
scrapeOptions: {
formats: ['markdown'],
onlyMainContent: true,
blockAds: true,
proxy: 'basic',
location: {
country: 'US',
languages: ['en-US']
}
}
})
console.log('✅ Extract successful!')
console.log('Result:', result)
if (result.success && result.id) {
// Test extract status
console.log('\n🔍 Testing extract status...')
const statusResult = await firecrawl.checkExtractStatus(result.id)
console.log('✅ Extract status check successful!')
console.log('Status:', statusResult)
}
} catch (err) {
console.error('❌ Extract failed:', err)
}
}
async function testExtractUntilCompletion() {
console.log('\n🔍 Testing extract...')
try {
const result = await firecrawl.extract(['https://firecrawl.dev'], {
prompt: 'Extract the pricing information from the website',
schema: z.object({
pricing: z.object({
free: z.object({
price: z.number(),
features: z.array(z.string())
}),
pro: z.object({
price: z.number(),
features: z.array(z.string())
})
})
}),
enableWebSearch: false,
ignoreSitemap: false,
includeSubdomains: true,
showSources: false,
scrapeOptions: {
formats: ['markdown'],
onlyMainContent: true,
blockAds: true,
proxy: 'basic',
location: {
country: 'US',
languages: ['en-US']
}
}
})
console.log('✅ Extract successful!')
console.log('Result:', result)
if (result.success && result.id) {
// Test extract status
console.log('\n🔍 Testing extract status...')
let statusResult = await firecrawl.checkExtractStatus(result.id)
while (statusResult.status === 'processing') {
// wait 5 seconds and check again
await new Promise((resolve) => setTimeout(resolve, 5000))
statusResult = await firecrawl.checkExtractStatus(result.id)
}
console.log('✅ Extract status check successful!')
console.log('Status:', statusResult)
}
} catch (err) {
console.error('❌ Extract failed:', err)
}
}
// =============================================
// Run all tests
// =============================================
console.log('🚀 Starting FirecrawlClient tests...\n')
// Run tests sequentially
await testUrlScraping()
await testSearch()
await testCrawlUrl()
await testExtract()
await testExtractUntilCompletion()
console.log('\n🏁 All tests completed!')