diff --git a/packages/firecrawl/package.json b/packages/firecrawl/package.json index 0c05817..2388284 100644 --- a/packages/firecrawl/package.json +++ b/packages/firecrawl/package.json @@ -2,29 +2,24 @@ "name": "@agentic/firecrawl", "version": "7.3.5", "description": "Agentic SDK for Firecrawl.", - "author": "Travis Fischer ", + "authors": [ + "Travis Fischer ", + "AdemΓ­lson Tonato " + ], "license": "MIT", "repository": { "type": "git", "url": "git+https://github.com/transitive-bullshit/agentic.git" }, - "type": "module", - "source": "./src/index.ts", - "types": "./dist/index.d.ts", "sideEffects": false, - "exports": { - ".": { - "types": "./dist/index.d.ts", - "import": "./dist/index.js", - "default": "./dist/index.js" - } - }, + "main": "./dist/index.js", + "module": "./dist/index.mjs", + "types": "./dist/index.d.ts", "files": [ - "dist" + "dist/**" ], "scripts": { - "build": "tsup --config ../../tsup.config.ts", - "dev": "tsup --config ../../tsup.config.ts --watch", + "build": "tsup", "clean": "del dist", "test": "run-s test:*", "test:lint": "eslint .", @@ -39,7 +34,10 @@ "zod": "^3.24.2" }, "devDependencies": { - "@agentic/tsconfig": "workspace:*" + "@agentic/tsconfig": "workspace:*", + "@types/node": "^20.11.16", + "tsup": "^8.0.1", + "typescript": "^5.3.3" }, "publishConfig": { "access": "public" diff --git a/packages/firecrawl/src/firecrawl-client.ts b/packages/firecrawl/src/firecrawl-client.ts index 3b7c65e..e879196 100644 --- a/packages/firecrawl/src/firecrawl-client.ts +++ b/packages/firecrawl/src/firecrawl-client.ts @@ -1,16 +1,396 @@ +// import type * as z from 'zod' import { - aiFunction, AIFunctionsProvider, assert, - delay, getEnv, - isZodSchema, throttleKy, zodToJsonSchema } from '@agentic/core' import defaultKy, { type KyInstance } from 'ky' import pThrottle from 'p-throttle' -import { z } from 'zod' +import { type z } from 'zod' + +/** + * Configuration interface for FirecrawlClient. + */ +export interface FirecrawlClientConfig { + apiKey?: string + apiBaseUrl?: string +} + +/** + * Metadata for a Firecrawl document. + */ +export interface FirecrawlDocumentMetadata { + title?: string + description?: string + language?: string + keywords?: string + robots?: string + ogTitle?: string + ogDescription?: string + ogUrl?: string + ogImage?: string + ogAudio?: string + ogDeterminer?: string + ogLocale?: string + ogLocaleAlternate?: string[] + ogSiteName?: string + ogVideo?: string + dctermsCreated?: string + dcDateCreated?: string + dcDate?: string + dctermsType?: string + dcType?: string + dctermsAudience?: string + dctermsSubject?: string + dcSubject?: string + dcDescription?: string + dctermsKeywords?: string + modifiedTime?: string + publishedTime?: string + articleTag?: string + articleSection?: string + sourceURL?: string + statusCode?: number + error?: string + [key: string]: any +} + +/** + * Document interface for Firecrawl. + */ +export interface FirecrawlDocument< + T = any, + ActionsSchema extends ActionsResult | never = never +> { + url?: string + markdown?: string + html?: string + rawHtml?: string + links?: string[] + extract?: T + json?: T + screenshot?: string + metadata?: FirecrawlDocumentMetadata + actions: ActionsSchema + title?: string + description?: string +} + +/** + * Parameters for scraping operations. + * Defines the options and configurations available for scraping web content. + */ +export interface ScrapeOptions { + formats?: ( + | 'markdown' + | 'html' + | 'rawHtml' + | 'content' + | 'links' + | 'screenshot' + | 'screenshot@fullPage' + | 'extract' + | 'json' + )[] + headers?: Record + includeTags?: string[] + excludeTags?: string[] + onlyMainContent?: boolean + waitFor?: number + timeout?: number + location?: { + country?: string + languages?: string[] + } + mobile?: boolean + skipTlsVerification?: boolean + removeBase64Images?: boolean + blockAds?: boolean + proxy?: 'basic' | 'stealth' +} + +/** + * Parameters for scraping operations. + */ +export interface ScrapeParams< + LLMSchema extends z.ZodSchema = any, + ActionsSchema extends Action[] | undefined = undefined +> { + formats?: ( + | 'markdown' + | 'html' + | 'rawHtml' + | 'content' + | 'links' + | 'screenshot' + | 'screenshot@fullPage' + | 'extract' + | 'json' + )[] + headers?: Record + includeTags?: string[] + excludeTags?: string[] + onlyMainContent?: boolean + waitFor?: number + timeout?: number + location?: { + country?: string + languages?: string[] + } + mobile?: boolean + skipTlsVerification?: boolean + removeBase64Images?: boolean + blockAds?: boolean + proxy?: 'basic' | 'stealth' + extract?: { + prompt?: string + schema?: LLMSchema + systemPrompt?: string + } + jsonOptions?: { + prompt?: string + schema?: LLMSchema + systemPrompt?: string + } + actions?: ActionsSchema +} + +export type Action = + | { + type: 'wait' + milliseconds?: number + selector?: string + } + | { + type: 'click' + selector: string + } + | { + type: 'screenshot' + fullPage?: boolean + } + | { + type: 'write' + text: string + } + | { + type: 'press' + key: string + } + | { + type: 'scroll' + direction?: 'up' | 'down' + selector?: string + } + | { + type: 'scrape' + } + | { + type: 'executeJavascript' + script: string + } + +export interface ActionsResult { + screenshots: string[] +} + +/** + * Response interface for scraping operations. + */ +export interface ScrapeResponse< + LLMResult = any, + ActionsSchema extends ActionsResult | never = never +> extends FirecrawlDocument { + success: true + warning?: string + error?: string +} + +/** + * Parameters for search operations. + */ +export interface SearchParams { + limit?: number + tbs?: string + filter?: string + lang?: string + country?: string + location?: string + origin?: string + timeout?: number + scrapeOptions?: ScrapeParams +} + +/** + * Response interface for search operations. + */ +export interface SearchResponse { + success: boolean + data: FirecrawlDocument[] + warning?: string + error?: string +} + +/** + * Parameters for crawling operations. + */ +export interface CrawlParams { + includePaths?: string[] + excludePaths?: string[] + maxDepth?: number + maxDiscoveryDepth?: number + limit?: number + allowBackwardLinks?: boolean + allowExternalLinks?: boolean + ignoreSitemap?: boolean + scrapeOptions?: ScrapeParams + webhook?: + | string + | { + url: string + headers?: Record + metadata?: Record + events?: ['completed', 'failed', 'page', 'started'][number][] + } + deduplicateSimilarURLs?: boolean + ignoreQueryParameters?: boolean + regexOnFullURL?: boolean +} + +/** + * Response interface for crawling operations. + */ +export interface CrawlResponse { + id?: string + url?: string + success: true + error?: string +} + +/** + * Response interface for job status checks. + */ +export interface CrawlStatusResponse { + success: true + status: 'scraping' | 'completed' | 'failed' | 'cancelled' + completed: number + total: number + creditsUsed: number + expiresAt: Date + next?: string + data: FirecrawlDocument[] +} + +/** + * Response interface for crawl errors. + */ +export interface CrawlErrorsResponse { + errors: { + id: string + timestamp?: string + url: string + error: string + }[] + robotsBlocked: string[] +} + +/** + * Error response interface. + */ +export interface ErrorResponse { + success: false + error: string +} + +/** + * Custom error class for Firecrawl. + */ +export class FirecrawlError extends Error { + statusCode: number + details?: any + + constructor(message: string, statusCode: number, details?: any) { + super(message) + this.statusCode = statusCode + this.details = details + } +} + +/** + * Parameters for extracting information from URLs. + */ +export interface ExtractParams { + prompt: string + schema?: T + enableWebSearch?: boolean + ignoreSitemap?: boolean + includeSubdomains?: boolean + showSources?: boolean + scrapeOptions?: ScrapeOptions +} + +/** + * Response interface for extracting information from URLs. + * Defines the structure of the response received after extracting information from URLs. + */ +export interface ExtractResponse { + success: boolean + id?: string + data: T + error?: string + warning?: string + sources?: string[] +} + +/** + * Response interface for extract status operations. + */ +export interface ExtractStatusResponse { + success: boolean + status: 'processing' | 'completed' | 'failed' + data?: T + error?: string + expiresAt?: string +} +/** + * Parameters for LLMs.txt generation operations. + */ +export interface GenerateLLMsTextParams { + /** + * Maximum number of URLs to process (1-100) + * @default 10 + */ + maxUrls?: number + /** + * Whether to show the full LLMs-full.txt in the response + * @default false + */ + showFullText?: boolean +} + +/** + * Response interface for LLMs.txt generation operations. + */ +export interface GenerateLLMsTextResponse { + success: boolean + id: string +} + +/** + * Status response interface for LLMs.txt generation operations. + */ +export interface GenerateLLMsTextStatusResponse { + success: boolean + data: { + llmstxt: string + llmsfulltxt?: string + } + status: 'processing' | 'completed' | 'failed' + error?: string + expiresAt: string +} export namespace firecrawl { export const BASE_URL = 'https://api.firecrawl.dev' @@ -21,79 +401,6 @@ export namespace firecrawl { interval: 1200, strict: true }) - - /** - * Generic parameter interface. - */ - export interface Params { - extractorOptions?: { - extractionSchema: z.ZodSchema | any - mode?: 'llm-extraction' - extractionPrompt?: string - } - } - - /** - * Response interface for scraping operations. - */ - export interface ScrapeResponse { - success: boolean - data?: Data - error?: string - } - - export interface Data { - content?: string - markdown?: string - html?: string - metadata: Metadata - } - - export interface Metadata { - title: string - description: string - keywords?: string - robots?: string - ogTitle?: string - ogDescription?: string - ogUrl?: string - ogImage?: string - ogLocaleAlternate?: any[] - ogSiteName?: string - sourceURL?: string - modifiedTime?: string - publishedTime?: string - } - - /** - * Response interface for searching operations. - */ - export interface SearchResponse { - success: boolean - data?: any - error?: string - } - - /** - * Response interface for crawling operations. - */ - export interface CrawlResponse { - success: boolean - jobId?: string - data?: any - error?: string - } - - /** - * Response interface for job status checks. - */ - export interface JobStatusResponse { - success: boolean - status: string - jobId?: string - data?: any - error?: string - } } /** @@ -140,124 +447,363 @@ export class FirecrawlClient extends AIFunctionsProvider { prefixUrl: apiBaseUrl, timeout: timeoutMs, headers: { - Authorization: `Bearer ${this.apiKey}` + Authorization: `Bearer ${this.apiKey}`, + 'X-Origin': 'agentic', + 'X-Origin-Type': 'integration' } }) } /** - * Scrape the contents of a URL. + * Sends a POST request. */ - @aiFunction({ - name: 'firecrawl_scrape_url', - description: 'Scrape the contents of a URL.', - inputSchema: z.object({ - url: z.string().url().describe('The URL to scrape.') - }) - }) - async scrapeUrl( - opts: { - url: string - } & firecrawl.Params - ) { - const json = { - ...opts + private async postRequest(path: string, data: any): Promise { + try { + const response = await this.ky.post(path, { json: data }) + return await response.json() + } catch (err) { + if (err instanceof Error) { + const response = await (err as any).response?.json() + if (response?.error) { + throw new FirecrawlError( + `Request failed. Error: ${response.error}`, + (err as any).response?.status ?? 500, + response?.details + ) + } + } + throw err } + } - if (opts?.extractorOptions?.extractionSchema) { - let schema = opts.extractorOptions.extractionSchema - if (isZodSchema(schema)) { + /** + * Sends a GET request. + */ + private async getRequest(path: string): Promise { + try { + const response = await this.ky.get(path) + return await response.json() + } catch (err) { + if (err instanceof Error) { + const response = await (err as any).response?.json() + if (response?.error) { + throw new FirecrawlError( + `Request failed. Error: ${response.error}`, + (err as any).response?.status ?? 500, + response?.details + ) + } + } + throw err + } + } + + /** + * Sends a DELETE request. + */ + private async deleteRequest(path: string): Promise { + try { + const response = await this.ky.delete(path) + return await response.json() + } catch (err) { + if (err instanceof Error) { + const response = await (err as any).response?.json() + if (response?.error) { + throw new FirecrawlError( + `Request failed. Error: ${response.error}`, + (err as any).response?.status ?? 500, + response?.details + ) + } + } + throw err + } + } + + /** + * Scrapes a URL using the Firecrawl API. + */ + async scrapeUrl< + T extends z.ZodSchema, + ActionsSchema extends Action[] | undefined = undefined + >( + url: string, + params?: ScrapeParams + ): Promise< + | ScrapeResponse< + z.infer, + ActionsSchema extends Action[] ? ActionsResult : never + > + | ErrorResponse + > { + let jsonData: any = { url, ...params } + + if (jsonData?.extract?.schema) { + let schema = jsonData.extract.schema + try { schema = zodToJsonSchema(schema) - } - - json.extractorOptions = { - mode: 'llm-extraction', - ...opts.extractorOptions, - extractionSchema: schema + } catch {} + jsonData = { + ...jsonData, + extract: { + ...jsonData.extract, + schema + } } } - return this.ky.post('v0/scrape', { json }).json() - } - - async search( - opts: { - query: string - } & firecrawl.Params - ) { - return this.ky - .post('v0/search', { json: opts }) - .json() - } - - async crawlUrl({ - waitUntilDone = true, - timeoutMs = 30_000, - idempotencyKey, - ...params - }: { - url: string - waitUntilDone?: boolean - timeoutMs?: number - idempotencyKey?: string - } & firecrawl.Params) { - const res = await this.ky - .post('v0/crawl', { - json: params, - timeout: timeoutMs, - headers: idempotencyKey - ? { - 'x-idempotency-key': idempotencyKey - } - : undefined - }) - .json() - - assert(res.jobId) - if (waitUntilDone) { - return this.waitForCrawlJob({ jobId: res.jobId, timeoutMs }) + if (jsonData?.jsonOptions?.schema) { + let schema = jsonData.jsonOptions.schema + try { + schema = zodToJsonSchema(schema) + } catch {} + jsonData = { + ...jsonData, + jsonOptions: { + ...jsonData.jsonOptions, + schema + } + } } - return res + try { + const response = await this.postRequest('v1/scrape', jsonData) + return response + } catch (err) { + if (err instanceof FirecrawlError) { + throw err + } + throw new FirecrawlError( + err instanceof Error ? err.message : 'Unknown error', + 500 + ) + } } - async checkCrawlStatus(jobId: string) { - assert(jobId) + /** + * Searches using the Firecrawl API. + */ + async search(query: string, params?: SearchParams): Promise { + const jsonData = { + query, + limit: params?.limit ?? 5, + tbs: params?.tbs, + filter: params?.filter, + lang: params?.lang ?? 'en', + country: params?.country ?? 'us', + location: params?.location, + origin: params?.origin ?? 'api', + timeout: params?.timeout ?? 60_000, + scrapeOptions: params?.scrapeOptions ?? { formats: [] } + } - return this.ky - .get(`v0/crawl/status/${jobId}`) - .json() + try { + const response = await this.postRequest('v1/search', jsonData) + if (response.success) { + return { + success: true, + data: response.data as FirecrawlDocument[], + warning: response.warning + } + } else { + throw new FirecrawlError( + `Failed to search. Error: ${response.error}`, + 500 + ) + } + } catch (err: any) { + if (err.response?.data?.error) { + throw new FirecrawlError( + `Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`, + err.response.status + ) + } else { + throw new FirecrawlError(err.message, 500) + } + } + return { success: false, error: 'Internal server error.', data: [] } } - async waitForCrawlJob({ - jobId, - timeoutMs = 60_000 - }: { - jobId: string - timeoutMs?: number - }) { - assert(jobId) + /** + * Initiates a crawl job for a URL. + */ + async crawlUrl( + url: string, + params?: CrawlParams + ): Promise { + const jsonData = { url, ...params } - const start = Date.now() - do { - const res = await this.checkCrawlStatus(jobId) - if (res.status === 'completed') { - return res - } - - if (!['active', 'paused', 'pending', 'queued'].includes(res.status)) { - throw new Error( - `Crawl job "${jobId}" failed or was stopped. Status: ${res.status}` + try { + const response = await this.postRequest('v1/crawl', jsonData) + if (response.success) { + return response + } else { + throw new FirecrawlError( + `Failed to start crawl job. Error: ${response.error}`, + 500 ) } + } catch (err: any) { + if (err.response?.data?.error) { + throw new FirecrawlError( + `Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`, + err.response.status + ) + } else { + throw new FirecrawlError(err.message, 500) + } + } + return { success: false, error: 'Internal server error.' } + } - if (Date.now() - start > timeoutMs) { - throw new Error( - `Timeout waiting for crawl job "${jobId}" to complete: ${res.status}` + /** + * Checks the status of a crawl job. + */ + async checkCrawlStatus( + id: string + ): Promise { + if (!id) { + throw new FirecrawlError('No crawl ID provided', 400) + } + + try { + const response = await this.getRequest(`v1/crawl/${id}`) + if (response.success) { + return response + } else { + throw new FirecrawlError( + `Failed to check crawl status. Error: ${response.error}`, + 500 ) } + } catch (err: any) { + throw new FirecrawlError(err.message, 500) + } + } - await delay(1000) - } while (true) + /** + * Returns information about crawl errors. + */ + async checkCrawlErrors( + id: string + ): Promise { + try { + const response = await this.getRequest(`v1/crawl/${id}/errors`) + if (response.errors) { + return response + } else { + throw new FirecrawlError( + `Failed to check crawl errors. Error: ${response.error}`, + 500 + ) + } + } catch (err: any) { + throw new FirecrawlError(err.message, 500) + } + } + + /** + * Cancels a crawl job. + */ + async cancelCrawl(id: string): Promise { + try { + const response = await this.deleteRequest(`v1/crawl/${id}`) + if (response.status) { + return response + } else { + throw new FirecrawlError( + `Failed to cancel crawl job. Error: ${response.error}`, + 500 + ) + } + } catch (err: any) { + throw new FirecrawlError(err.message, 500) + } + } + + /** + * Extracts structured data from URLs using LLMs. + * @param urls - Array of URLs to extract data from + * @param params - Additional parameters for the extract request + * @returns The response from the extract operation + */ + async extract( + urls: string[], + params: ExtractParams + ): Promise>> { + const jsonData = { + urls, + ...params, + schema: params.schema ? zodToJsonSchema(params.schema) : undefined + } + + try { + const response = await this.postRequest('v1/extract', jsonData) + if (!response.success) { + throw new FirecrawlError( + response.error || 'Extract operation failed', + 500 + ) + } + return response + } catch (err) { + if (err instanceof FirecrawlError) { + throw err + } + throw new FirecrawlError( + err instanceof Error ? err.message : 'Unknown error', + 500 + ) + } + } + + /** + * Checks the status of an extract operation. + */ + async checkExtractStatus( + id: string + ): Promise> { + if (!id) { + throw new FirecrawlError('No extract ID provided', 400) + } + + try { + const response = await this.getRequest(`v1/extract/${id}`) + return response + } catch (err) { + if (err instanceof FirecrawlError) { + throw err + } + throw new FirecrawlError( + err instanceof Error ? err.message : 'Unknown error', + 500 + ) + } + } + + /** + * Generates LLMs.txt for a given URL. + */ + async generateLLMsText( + url: string, + params: GenerateLLMsTextParams + ): Promise { + const jsonData = { + url, + ...params + } + + try { + const response = await this.postRequest('v1/llmstxt', jsonData) + return response + } catch (err) { + if (err instanceof FirecrawlError) { + throw err + } + throw new FirecrawlError( + err instanceof Error ? err.message : 'Unknown error', + 500 + ) + } } } diff --git a/packages/firecrawl/src/index.ts b/packages/firecrawl/src/index.ts index 9390f89..22f774a 100644 --- a/packages/firecrawl/src/index.ts +++ b/packages/firecrawl/src/index.ts @@ -1 +1,13 @@ -export * from './firecrawl-client' +export type { + ErrorResponse, + ExtractParams, + ExtractResponse, + ExtractStatusResponse, + FirecrawlClientConfig, + GenerateLLMsTextParams, + GenerateLLMsTextResponse, + GenerateLLMsTextStatusResponse, + ScrapeParams, + ScrapeResponse +} from './firecrawl-client.js' +export { FirecrawlClient } from './firecrawl-client.js' diff --git a/packages/firecrawl/test-firecrawl-client.ts b/packages/firecrawl/test-firecrawl-client.ts new file mode 100644 index 0000000..328d93f --- /dev/null +++ b/packages/firecrawl/test-firecrawl-client.ts @@ -0,0 +1,194 @@ +import { z } from 'zod' + +import { FirecrawlClient } from './dist/index.mjs' + +// Initialize the client with the API key +const apiKey = 'FIRECRAWL-API-KEY' +const firecrawl = new FirecrawlClient({ apiKey }) + +// ============================================= +// Test 1: URL Scraping +// ============================================= +async function testUrlScraping() { + console.log('πŸ” Testing URL scraping...') + try { + const result = await firecrawl.scrapeUrl('https://mairistumpf.com') + console.log('βœ… URL scraping successful!') + console.log('Result:', result) + } catch (err) { + console.error('❌ URL scraping failed:', err) + } +} + +// ============================================= +// Test 2: Search +// ============================================= +async function testSearch() { + console.log('\nπŸ” Testing search...') + try { + const result = await firecrawl.search('artificial intelligence news', { + limit: 5, + lang: 'en', + country: 'us' + }) + console.log('βœ… Search successful!') + console.log('Results:', result.data) + console.log('Results:', result.data.length) + } catch (err) { + console.error('❌ Search failed:', err) + } +} + +// ============================================= +// Test 3: Crawl URL +// ============================================= +async function testCrawlUrl() { + console.log('\nπŸ” Testing URL crawling...') + try { + const result = await firecrawl.crawlUrl('https://example.com', { + maxDepth: 2, + limit: 5 + }) + console.log('βœ… Crawl initiated successfully!') + console.log('Result:', result) + + if (result.success && result.id) { + // Test crawl status + console.log('\nπŸ” Testing crawl status...') + const statusResult = await firecrawl.checkCrawlStatus(result.id) + console.log('βœ… Crawl status check successful!') + console.log('Status:', statusResult) + + // Test crawl errors + console.log('\nπŸ” Testing crawl errors...') + const errorsResult = await firecrawl.checkCrawlErrors(result.id) + console.log('βœ… Crawl errors check successful!') + console.log('Errors:', errorsResult) + + // Test crawl cancellation + console.log('\nπŸ” Testing crawl cancellation...') + const cancelResult = await firecrawl.cancelCrawl(result.id) + console.log('βœ… Crawl cancellation successful!') + console.log('Result:', cancelResult) + } + } catch (err) { + console.error('❌ Crawl operations failed:', err) + } +} + +// ============================================= +// Test 4: Extract +// ============================================= +async function testExtract() { + console.log('\nπŸ” Testing extract...') + try { + const result = await firecrawl.extract(['https://firecrawl.dev'], { + prompt: 'Extract the pricing information from the website', + schema: z.object({ + pricing: z.object({ + free: z.object({ + price: z.number(), + features: z.array(z.string()) + }), + pro: z.object({ + price: z.number(), + features: z.array(z.string()) + }) + }) + }), + enableWebSearch: false, + ignoreSitemap: false, + includeSubdomains: true, + showSources: false, + scrapeOptions: { + formats: ['markdown'], + onlyMainContent: true, + blockAds: true, + proxy: 'basic', + location: { + country: 'US', + languages: ['en-US'] + } + } + }) + console.log('βœ… Extract successful!') + console.log('Result:', result) + + if (result.success && result.id) { + // Test extract status + console.log('\nπŸ” Testing extract status...') + const statusResult = await firecrawl.checkExtractStatus(result.id) + console.log('βœ… Extract status check successful!') + console.log('Status:', statusResult) + } + } catch (err) { + console.error('❌ Extract failed:', err) + } +} + +async function testExtractUntilCompletion() { + console.log('\nπŸ” Testing extract...') + try { + const result = await firecrawl.extract(['https://firecrawl.dev'], { + prompt: 'Extract the pricing information from the website', + schema: z.object({ + pricing: z.object({ + free: z.object({ + price: z.number(), + features: z.array(z.string()) + }), + pro: z.object({ + price: z.number(), + features: z.array(z.string()) + }) + }) + }), + enableWebSearch: false, + ignoreSitemap: false, + includeSubdomains: true, + showSources: false, + scrapeOptions: { + formats: ['markdown'], + onlyMainContent: true, + blockAds: true, + proxy: 'basic', + location: { + country: 'US', + languages: ['en-US'] + } + } + }) + console.log('βœ… Extract successful!') + console.log('Result:', result) + + if (result.success && result.id) { + // Test extract status + console.log('\nπŸ” Testing extract status...') + let statusResult = await firecrawl.checkExtractStatus(result.id) + + while (statusResult.status === 'processing') { + // wait 5 seconds and check again + await new Promise((resolve) => setTimeout(resolve, 5000)) + statusResult = await firecrawl.checkExtractStatus(result.id) + } + console.log('βœ… Extract status check successful!') + console.log('Status:', statusResult) + } + } catch (err) { + console.error('❌ Extract failed:', err) + } +} + +// ============================================= +// Run all tests +// ============================================= +console.log('πŸš€ Starting FirecrawlClient tests...\n') + +// Run tests sequentially +await testUrlScraping() +await testSearch() +await testCrawlUrl() +await testExtract() +await testExtractUntilCompletion() + +console.log('\n🏁 All tests completed!')