fix: firecrawl updates from last pr

pull/697/head
Travis Fischer 2025-03-18 21:02:16 +08:00
rodzic 5efa64bb4f
commit 0ec220446d
4 zmienionych plików z 489 dodań i 457 usunięć

Wyświetl plik

@ -11,15 +11,23 @@
"type": "git",
"url": "git+https://github.com/transitive-bullshit/agentic.git"
},
"sideEffects": false,
"main": "./dist/index.js",
"module": "./dist/index.mjs",
"type": "module",
"source": "./src/index.ts",
"types": "./dist/index.d.ts",
"sideEffects": false,
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js",
"default": "./dist/index.js"
}
},
"files": [
"dist/**"
"dist"
],
"scripts": {
"build": "tsup",
"build": "tsup --config ../../tsup.config.ts",
"dev": "tsup --config ../../tsup.config.ts --watch",
"clean": "del dist",
"test": "run-s test:*",
"test:lint": "eslint .",
@ -34,10 +42,7 @@
"zod": "catalog:"
},
"devDependencies": {
"@agentic/tsconfig": "workspace:*",
"@types/node": "^20.11.16",
"tsup": "^8.0.1",
"typescript": "^5.3.3"
"@agentic/tsconfig": "workspace:*"
},
"publishConfig": {
"access": "public"

Wyświetl plik

@ -1,6 +1,6 @@
import { z } from 'zod'
import { FirecrawlClient } from './dist/index.mjs'
import { FirecrawlClient } from './firecrawl-client'
// Initialize the client with the API key
const apiKey = 'FIRECRAWL-API-KEY'
@ -26,7 +26,8 @@ async function testUrlScraping() {
async function testSearch() {
console.log('\n🔍 Testing search...')
try {
const result = await firecrawl.search('artificial intelligence news', {
const result = await firecrawl.search({
query: 'artificial intelligence news',
limit: 5,
lang: 'en',
country: 'us'
@ -45,7 +46,8 @@ async function testSearch() {
async function testCrawlUrl() {
console.log('\n🔍 Testing URL crawling...')
try {
const result = await firecrawl.crawlUrl('https://example.com', {
const result = await firecrawl.crawlUrl({
url: 'https://example.com',
maxDepth: 2,
limit: 5
})

Wyświetl plik

@ -1,5 +1,6 @@
// import type * as z from 'zod'
import {
aiFunction,
AIFunctionsProvider,
assert,
getEnv,
@ -8,20 +9,30 @@ import {
} from '@agentic/core'
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { type z } from 'zod'
import { z } from 'zod'
/**
export namespace firecrawl {
export const BASE_URL = 'https://api.firecrawl.dev'
// Allow up to 50 request per minute by default.
export const throttle = pThrottle({
limit: 1,
interval: 1200,
strict: true
})
/**
* Configuration interface for FirecrawlClient.
*/
export interface FirecrawlClientConfig {
export interface ClientConfig {
apiKey?: string
apiBaseUrl?: string
}
}
/**
/**
* Metadata for a Firecrawl document.
*/
export interface FirecrawlDocumentMetadata {
export interface DocumentMetadata {
title?: string
description?: string
language?: string
@ -55,15 +66,15 @@ export interface FirecrawlDocumentMetadata {
statusCode?: number
error?: string
[key: string]: any
}
}
/**
/**
* Document interface for Firecrawl.
*/
export interface FirecrawlDocument<
export interface Document<
T = any,
ActionsSchema extends ActionsResult | never = never
> {
> {
url?: string
markdown?: string
html?: string
@ -72,17 +83,17 @@ export interface FirecrawlDocument<
extract?: T
json?: T
screenshot?: string
metadata?: FirecrawlDocumentMetadata
metadata?: DocumentMetadata
actions: ActionsSchema
title?: string
description?: string
}
}
/**
/**
* Parameters for scraping operations.
* Defines the options and configurations available for scraping web content.
*/
export interface ScrapeOptions {
export interface ScrapeOptions {
formats?: (
| 'markdown'
| 'html'
@ -109,15 +120,15 @@ export interface ScrapeOptions {
removeBase64Images?: boolean
blockAds?: boolean
proxy?: 'basic' | 'stealth'
}
}
/**
/**
* Parameters for scraping operations.
*/
export interface ScrapeParams<
export interface ScrapeParams<
LLMSchema extends z.ZodSchema = any,
ActionsSchema extends Action[] | undefined = undefined
> {
> {
formats?: (
| 'markdown'
| 'html'
@ -155,9 +166,9 @@ export interface ScrapeParams<
systemPrompt?: string
}
actions?: ActionsSchema
}
}
export type Action =
export type Action =
| {
type: 'wait'
milliseconds?: number
@ -192,26 +203,26 @@ export type Action =
script: string
}
export interface ActionsResult {
export interface ActionsResult {
screenshots: string[]
}
}
/**
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse<
export interface ScrapeResponse<
LLMResult = any,
ActionsSchema extends ActionsResult | never = never
> extends FirecrawlDocument<LLMResult, ActionsSchema> {
> extends Document<LLMResult, ActionsSchema> {
success: true
warning?: string
error?: string
}
}
/**
/**
* Parameters for search operations.
*/
export interface SearchParams {
export interface SearchParams {
limit?: number
tbs?: string
filter?: string
@ -221,22 +232,22 @@ export interface SearchParams {
origin?: string
timeout?: number
scrapeOptions?: ScrapeParams
}
}
/**
/**
* Response interface for search operations.
*/
export interface SearchResponse {
export interface SearchResponse {
success: boolean
data: FirecrawlDocument<undefined>[]
data: Document[]
warning?: string
error?: string
}
}
/**
/**
* Parameters for crawling operations.
*/
export interface CrawlParams {
export interface CrawlParams {
includePaths?: string[]
excludePaths?: string[]
maxDepth?: number
@ -257,22 +268,22 @@ export interface CrawlParams {
deduplicateSimilarURLs?: boolean
ignoreQueryParameters?: boolean
regexOnFullURL?: boolean
}
}
/**
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
export interface CrawlResponse {
id?: string
url?: string
success: true
error?: string
}
}
/**
/**
* Response interface for job status checks.
*/
export interface CrawlStatusResponse {
export interface CrawlStatusResponse {
success: true
status: 'scraping' | 'completed' | 'failed' | 'cancelled'
completed: number
@ -280,13 +291,13 @@ export interface CrawlStatusResponse {
creditsUsed: number
expiresAt: Date
next?: string
data: FirecrawlDocument<undefined>[]
}
data: Document[]
}
/**
/**
* Response interface for crawl errors.
*/
export interface CrawlErrorsResponse {
export interface CrawlErrorsResponse {
errors: {
id: string
timestamp?: string
@ -294,20 +305,20 @@ export interface CrawlErrorsResponse {
error: string
}[]
robotsBlocked: string[]
}
}
/**
/**
* Error response interface.
*/
export interface ErrorResponse {
export interface ErrorResponse {
success: false
error: string
}
}
/**
/**
* Custom error class for Firecrawl.
*/
export class FirecrawlError extends Error {
export class FirecrawlError extends Error {
statusCode: number
details?: any
@ -316,12 +327,12 @@ export class FirecrawlError extends Error {
this.statusCode = statusCode
this.details = details
}
}
}
/**
/**
* Parameters for extracting information from URLs.
*/
export interface ExtractParams<T extends z.ZodSchema = any> {
export interface ExtractParams<T extends z.ZodSchema = any> {
prompt: string
schema?: T
enableWebSearch?: boolean
@ -329,35 +340,36 @@ export interface ExtractParams<T extends z.ZodSchema = any> {
includeSubdomains?: boolean
showSources?: boolean
scrapeOptions?: ScrapeOptions
}
}
/**
/**
* Response interface for extracting information from URLs.
* Defines the structure of the response received after extracting information from URLs.
*/
export interface ExtractResponse<T = any> {
export interface ExtractResponse<T = any> {
success: boolean
id?: string
data: T
error?: string
warning?: string
sources?: string[]
}
}
/**
/**
* Response interface for extract status operations.
*/
export interface ExtractStatusResponse<T = any> {
export interface ExtractStatusResponse<T = any> {
success: boolean
status: 'processing' | 'completed' | 'failed'
data?: T
error?: string
expiresAt?: string
}
/**
}
/**
* Parameters for LLMs.txt generation operations.
*/
export interface GenerateLLMsTextParams {
export interface GenerateLLMsTextParams {
/**
* Maximum number of URLs to process (1-100)
* @default 10
@ -368,20 +380,20 @@ export interface GenerateLLMsTextParams {
* @default false
*/
showFullText?: boolean
}
}
/**
/**
* Response interface for LLMs.txt generation operations.
*/
export interface GenerateLLMsTextResponse {
export interface GenerateLLMsTextResponse {
success: boolean
id: string
}
}
/**
/**
* Status response interface for LLMs.txt generation operations.
*/
export interface GenerateLLMsTextStatusResponse {
export interface GenerateLLMsTextStatusResponse {
success: boolean
data: {
llmstxt: string
@ -390,17 +402,7 @@ export interface GenerateLLMsTextStatusResponse {
status: 'processing' | 'completed' | 'failed'
error?: string
expiresAt: string
}
export namespace firecrawl {
export const BASE_URL = 'https://api.firecrawl.dev'
// Allow up to 50 request per minute by default.
export const throttle = pThrottle({
limit: 1,
interval: 1200,
strict: true
})
}
}
/**
@ -457,7 +459,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
/**
* Sends a POST request.
*/
private async postRequest(path: string, data: any): Promise<any> {
protected async postRequest(path: string, data: any): Promise<any> {
try {
const response = await this.ky.post(path, { json: data })
return await response.json()
@ -465,7 +467,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
if (err instanceof Error) {
const response = await (err as any).response?.json()
if (response?.error) {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Request failed. Error: ${response.error}`,
(err as any).response?.status ?? 500,
response?.details
@ -479,7 +481,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
/**
* Sends a GET request.
*/
private async getRequest(path: string): Promise<any> {
protected async getRequest(path: string): Promise<any> {
try {
const response = await this.ky.get(path)
return await response.json()
@ -487,7 +489,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
if (err instanceof Error) {
const response = await (err as any).response?.json()
if (response?.error) {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Request failed. Error: ${response.error}`,
(err as any).response?.status ?? 500,
response?.details
@ -501,7 +503,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
/**
* Sends a DELETE request.
*/
private async deleteRequest(path: string): Promise<any> {
protected async deleteRequest(path: string): Promise<any> {
try {
const response = await this.ky.delete(path)
return await response.json()
@ -509,7 +511,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
if (err instanceof Error) {
const response = await (err as any).response?.json()
if (response?.error) {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Request failed. Error: ${response.error}`,
(err as any).response?.status ?? 500,
response?.details
@ -521,21 +523,33 @@ export class FirecrawlClient extends AIFunctionsProvider {
}
/**
* Scrapes a URL using the Firecrawl API.
* Scrape the contents of a URL.
*/
@aiFunction({
name: 'firecrawl_scrape_url',
description: 'Scrape the contents of a URL.',
inputSchema: z.object({
url: z.string().url().describe('The URL to scrape.')
})
})
async scrapeUrl<
T extends z.ZodSchema,
ActionsSchema extends Action[] | undefined = undefined
ActionsSchema extends firecrawl.Action[] | undefined = undefined
>(
url: string,
params?: ScrapeParams<T, ActionsSchema>
orlOrOpts:
| string
| ({ url: string } & firecrawl.ScrapeParams<T, ActionsSchema>)
): Promise<
| ScrapeResponse<
| firecrawl.ScrapeResponse<
z.infer<T>,
ActionsSchema extends Action[] ? ActionsResult : never
ActionsSchema extends firecrawl.Action[]
? firecrawl.ActionsResult
: never
>
| ErrorResponse
| firecrawl.ErrorResponse
> {
const { url, ...params } =
typeof orlOrOpts === 'string' ? { url: orlOrOpts } : orlOrOpts
let jsonData: any = { url, ...params }
if (jsonData?.extract?.schema) {
@ -570,10 +584,10 @@ export class FirecrawlClient extends AIFunctionsProvider {
const response = await this.postRequest('v1/scrape', jsonData)
return response
} catch (err) {
if (err instanceof FirecrawlError) {
if (err instanceof firecrawl.FirecrawlError) {
throw err
}
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
err instanceof Error ? err.message : 'Unknown error',
500
)
@ -583,7 +597,19 @@ export class FirecrawlClient extends AIFunctionsProvider {
/**
* Searches using the Firecrawl API.
*/
async search(query: string, params?: SearchParams): Promise<SearchResponse> {
@aiFunction({
name: 'firecrawl_search',
description: 'Searches the internet for the given query.',
inputSchema: z.object({
query: z.string().describe('Search query.')
})
})
async search(
queryOrOpts: string | ({ query: string } & firecrawl.SearchParams)
): Promise<firecrawl.SearchResponse> {
const { query, ...params } =
typeof queryOrOpts === 'string' ? { query: queryOrOpts } : queryOrOpts
const jsonData = {
query,
limit: params?.limit ?? 5,
@ -602,23 +628,23 @@ export class FirecrawlClient extends AIFunctionsProvider {
if (response.success) {
return {
success: true,
data: response.data as FirecrawlDocument<any>[],
data: response.data as firecrawl.Document[],
warning: response.warning
}
} else {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Failed to search. Error: ${response.error}`,
500
)
}
} catch (err: any) {
if (err.response?.data?.error) {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
err.response.status
)
} else {
throw new FirecrawlError(err.message, 500)
throw new firecrawl.FirecrawlError(err.message, 500)
}
}
return { success: false, error: 'Internal server error.', data: [] }
@ -627,10 +653,18 @@ export class FirecrawlClient extends AIFunctionsProvider {
/**
* Initiates a crawl job for a URL.
*/
@aiFunction({
name: 'firecrawl_crawl_url',
description: 'Initiates a crawl job for a URL.',
inputSchema: z.object({
url: z.string().url().describe('The URL to crawl.')
})
})
async crawlUrl(
url: string,
params?: CrawlParams
): Promise<CrawlResponse | ErrorResponse> {
urlOrOpts: string | ({ url: string } & firecrawl.CrawlParams)
): Promise<firecrawl.CrawlResponse | firecrawl.ErrorResponse> {
const { url, ...params } =
typeof urlOrOpts === 'string' ? { url: urlOrOpts } : urlOrOpts
const jsonData = { url, ...params }
try {
@ -638,19 +672,19 @@ export class FirecrawlClient extends AIFunctionsProvider {
if (response.success) {
return response
} else {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Failed to start crawl job. Error: ${response.error}`,
500
)
}
} catch (err: any) {
if (err.response?.data?.error) {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
err.response.status
)
} else {
throw new FirecrawlError(err.message, 500)
throw new firecrawl.FirecrawlError(err.message, 500)
}
}
return { success: false, error: 'Internal server error.' }
@ -661,9 +695,9 @@ export class FirecrawlClient extends AIFunctionsProvider {
*/
async checkCrawlStatus(
id: string
): Promise<CrawlStatusResponse | ErrorResponse> {
): Promise<firecrawl.CrawlStatusResponse | firecrawl.ErrorResponse> {
if (!id) {
throw new FirecrawlError('No crawl ID provided', 400)
throw new firecrawl.FirecrawlError('No crawl ID provided', 400)
}
try {
@ -671,13 +705,13 @@ export class FirecrawlClient extends AIFunctionsProvider {
if (response.success) {
return response
} else {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Failed to check crawl status. Error: ${response.error}`,
500
)
}
} catch (err: any) {
throw new FirecrawlError(err.message, 500)
throw new firecrawl.FirecrawlError(err.message, 500)
}
}
@ -686,51 +720,52 @@ export class FirecrawlClient extends AIFunctionsProvider {
*/
async checkCrawlErrors(
id: string
): Promise<CrawlErrorsResponse | ErrorResponse> {
): Promise<firecrawl.CrawlErrorsResponse | firecrawl.ErrorResponse> {
try {
const response = await this.getRequest(`v1/crawl/${id}/errors`)
if (response.errors) {
return response
} else {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Failed to check crawl errors. Error: ${response.error}`,
500
)
}
} catch (err: any) {
throw new FirecrawlError(err.message, 500)
throw new firecrawl.FirecrawlError(err.message, 500)
}
}
/**
* Cancels a crawl job.
*/
async cancelCrawl(id: string): Promise<ErrorResponse> {
async cancelCrawl(id: string): Promise<firecrawl.ErrorResponse> {
try {
const response = await this.deleteRequest(`v1/crawl/${id}`)
if (response.status) {
return response
} else {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
`Failed to cancel crawl job. Error: ${response.error}`,
500
)
}
} catch (err: any) {
throw new FirecrawlError(err.message, 500)
throw new firecrawl.FirecrawlError(err.message, 500)
}
}
/**
* Extracts structured data from URLs using LLMs.
*
* @param urls - Array of URLs to extract data from
* @param params - Additional parameters for the extract request
* @returns The response from the extract operation
*/
async extract<T extends z.ZodSchema>(
urls: string[],
params: ExtractParams<T>
): Promise<ExtractResponse<z.infer<T>>> {
params: firecrawl.ExtractParams<T>
): Promise<firecrawl.ExtractResponse<z.infer<T>>> {
const jsonData = {
urls,
...params,
@ -740,17 +775,17 @@ export class FirecrawlClient extends AIFunctionsProvider {
try {
const response = await this.postRequest('v1/extract', jsonData)
if (!response.success) {
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
response.error || 'Extract operation failed',
500
)
}
return response
} catch (err) {
if (err instanceof FirecrawlError) {
if (err instanceof firecrawl.FirecrawlError) {
throw err
}
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
err instanceof Error ? err.message : 'Unknown error',
500
)
@ -762,19 +797,19 @@ export class FirecrawlClient extends AIFunctionsProvider {
*/
async checkExtractStatus<T = any>(
id: string
): Promise<ExtractStatusResponse<T>> {
): Promise<firecrawl.ExtractStatusResponse<T>> {
if (!id) {
throw new FirecrawlError('No extract ID provided', 400)
throw new firecrawl.FirecrawlError('No extract ID provided', 400)
}
try {
const response = await this.getRequest(`v1/extract/${id}`)
return response
} catch (err) {
if (err instanceof FirecrawlError) {
if (err instanceof firecrawl.FirecrawlError) {
throw err
}
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
err instanceof Error ? err.message : 'Unknown error',
500
)
@ -786,8 +821,10 @@ export class FirecrawlClient extends AIFunctionsProvider {
*/
async generateLLMsText(
url: string,
params: GenerateLLMsTextParams
): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
params?: firecrawl.GenerateLLMsTextParams
): Promise<
firecrawl.GenerateLLMsTextStatusResponse | firecrawl.ErrorResponse
> {
const jsonData = {
url,
...params
@ -797,10 +834,10 @@ export class FirecrawlClient extends AIFunctionsProvider {
const response = await this.postRequest('v1/llmstxt', jsonData)
return response
} catch (err) {
if (err instanceof FirecrawlError) {
if (err instanceof firecrawl.FirecrawlError) {
throw err
}
throw new FirecrawlError(
throw new firecrawl.FirecrawlError(
err instanceof Error ? err.message : 'Unknown error',
500
)

Wyświetl plik

@ -1,13 +1 @@
export type {
ErrorResponse,
ExtractParams,
ExtractResponse,
ExtractStatusResponse,
FirecrawlClientConfig,
GenerateLLMsTextParams,
GenerateLLMsTextResponse,
GenerateLLMsTextStatusResponse,
ScrapeParams,
ScrapeResponse
} from './firecrawl-client.js'
export { FirecrawlClient } from './firecrawl-client.js'
export * from './firecrawl-client'