kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
fix: firecrawl updates from last pr
rodzic
5efa64bb4f
commit
0ec220446d
|
@ -11,15 +11,23 @@
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "git+https://github.com/transitive-bullshit/agentic.git"
|
"url": "git+https://github.com/transitive-bullshit/agentic.git"
|
||||||
},
|
},
|
||||||
"sideEffects": false,
|
"type": "module",
|
||||||
"main": "./dist/index.js",
|
"source": "./src/index.ts",
|
||||||
"module": "./dist/index.mjs",
|
|
||||||
"types": "./dist/index.d.ts",
|
"types": "./dist/index.d.ts",
|
||||||
|
"sideEffects": false,
|
||||||
|
"exports": {
|
||||||
|
".": {
|
||||||
|
"types": "./dist/index.d.ts",
|
||||||
|
"import": "./dist/index.js",
|
||||||
|
"default": "./dist/index.js"
|
||||||
|
}
|
||||||
|
},
|
||||||
"files": [
|
"files": [
|
||||||
"dist/**"
|
"dist"
|
||||||
],
|
],
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"build": "tsup",
|
"build": "tsup --config ../../tsup.config.ts",
|
||||||
|
"dev": "tsup --config ../../tsup.config.ts --watch",
|
||||||
"clean": "del dist",
|
"clean": "del dist",
|
||||||
"test": "run-s test:*",
|
"test": "run-s test:*",
|
||||||
"test:lint": "eslint .",
|
"test:lint": "eslint .",
|
||||||
|
@ -34,10 +42,7 @@
|
||||||
"zod": "catalog:"
|
"zod": "catalog:"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@agentic/tsconfig": "workspace:*",
|
"@agentic/tsconfig": "workspace:*"
|
||||||
"@types/node": "^20.11.16",
|
|
||||||
"tsup": "^8.0.1",
|
|
||||||
"typescript": "^5.3.3"
|
|
||||||
},
|
},
|
||||||
"publishConfig": {
|
"publishConfig": {
|
||||||
"access": "public"
|
"access": "public"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import { z } from 'zod'
|
import { z } from 'zod'
|
||||||
|
|
||||||
import { FirecrawlClient } from './dist/index.mjs'
|
import { FirecrawlClient } from './firecrawl-client'
|
||||||
|
|
||||||
// Initialize the client with the API key
|
// Initialize the client with the API key
|
||||||
const apiKey = 'FIRECRAWL-API-KEY'
|
const apiKey = 'FIRECRAWL-API-KEY'
|
||||||
|
@ -26,7 +26,8 @@ async function testUrlScraping() {
|
||||||
async function testSearch() {
|
async function testSearch() {
|
||||||
console.log('\n🔍 Testing search...')
|
console.log('\n🔍 Testing search...')
|
||||||
try {
|
try {
|
||||||
const result = await firecrawl.search('artificial intelligence news', {
|
const result = await firecrawl.search({
|
||||||
|
query: 'artificial intelligence news',
|
||||||
limit: 5,
|
limit: 5,
|
||||||
lang: 'en',
|
lang: 'en',
|
||||||
country: 'us'
|
country: 'us'
|
||||||
|
@ -45,7 +46,8 @@ async function testSearch() {
|
||||||
async function testCrawlUrl() {
|
async function testCrawlUrl() {
|
||||||
console.log('\n🔍 Testing URL crawling...')
|
console.log('\n🔍 Testing URL crawling...')
|
||||||
try {
|
try {
|
||||||
const result = await firecrawl.crawlUrl('https://example.com', {
|
const result = await firecrawl.crawlUrl({
|
||||||
|
url: 'https://example.com',
|
||||||
maxDepth: 2,
|
maxDepth: 2,
|
||||||
limit: 5
|
limit: 5
|
||||||
})
|
})
|
|
@ -1,5 +1,6 @@
|
||||||
// import type * as z from 'zod'
|
// import type * as z from 'zod'
|
||||||
import {
|
import {
|
||||||
|
aiFunction,
|
||||||
AIFunctionsProvider,
|
AIFunctionsProvider,
|
||||||
assert,
|
assert,
|
||||||
getEnv,
|
getEnv,
|
||||||
|
@ -8,20 +9,30 @@ import {
|
||||||
} from '@agentic/core'
|
} from '@agentic/core'
|
||||||
import defaultKy, { type KyInstance } from 'ky'
|
import defaultKy, { type KyInstance } from 'ky'
|
||||||
import pThrottle from 'p-throttle'
|
import pThrottle from 'p-throttle'
|
||||||
import { type z } from 'zod'
|
import { z } from 'zod'
|
||||||
|
|
||||||
/**
|
export namespace firecrawl {
|
||||||
|
export const BASE_URL = 'https://api.firecrawl.dev'
|
||||||
|
|
||||||
|
// Allow up to 50 request per minute by default.
|
||||||
|
export const throttle = pThrottle({
|
||||||
|
limit: 1,
|
||||||
|
interval: 1200,
|
||||||
|
strict: true
|
||||||
|
})
|
||||||
|
|
||||||
|
/**
|
||||||
* Configuration interface for FirecrawlClient.
|
* Configuration interface for FirecrawlClient.
|
||||||
*/
|
*/
|
||||||
export interface FirecrawlClientConfig {
|
export interface ClientConfig {
|
||||||
apiKey?: string
|
apiKey?: string
|
||||||
apiBaseUrl?: string
|
apiBaseUrl?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Metadata for a Firecrawl document.
|
* Metadata for a Firecrawl document.
|
||||||
*/
|
*/
|
||||||
export interface FirecrawlDocumentMetadata {
|
export interface DocumentMetadata {
|
||||||
title?: string
|
title?: string
|
||||||
description?: string
|
description?: string
|
||||||
language?: string
|
language?: string
|
||||||
|
@ -55,15 +66,15 @@ export interface FirecrawlDocumentMetadata {
|
||||||
statusCode?: number
|
statusCode?: number
|
||||||
error?: string
|
error?: string
|
||||||
[key: string]: any
|
[key: string]: any
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Document interface for Firecrawl.
|
* Document interface for Firecrawl.
|
||||||
*/
|
*/
|
||||||
export interface FirecrawlDocument<
|
export interface Document<
|
||||||
T = any,
|
T = any,
|
||||||
ActionsSchema extends ActionsResult | never = never
|
ActionsSchema extends ActionsResult | never = never
|
||||||
> {
|
> {
|
||||||
url?: string
|
url?: string
|
||||||
markdown?: string
|
markdown?: string
|
||||||
html?: string
|
html?: string
|
||||||
|
@ -72,17 +83,17 @@ export interface FirecrawlDocument<
|
||||||
extract?: T
|
extract?: T
|
||||||
json?: T
|
json?: T
|
||||||
screenshot?: string
|
screenshot?: string
|
||||||
metadata?: FirecrawlDocumentMetadata
|
metadata?: DocumentMetadata
|
||||||
actions: ActionsSchema
|
actions: ActionsSchema
|
||||||
title?: string
|
title?: string
|
||||||
description?: string
|
description?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parameters for scraping operations.
|
* Parameters for scraping operations.
|
||||||
* Defines the options and configurations available for scraping web content.
|
* Defines the options and configurations available for scraping web content.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeOptions {
|
export interface ScrapeOptions {
|
||||||
formats?: (
|
formats?: (
|
||||||
| 'markdown'
|
| 'markdown'
|
||||||
| 'html'
|
| 'html'
|
||||||
|
@ -109,15 +120,15 @@ export interface ScrapeOptions {
|
||||||
removeBase64Images?: boolean
|
removeBase64Images?: boolean
|
||||||
blockAds?: boolean
|
blockAds?: boolean
|
||||||
proxy?: 'basic' | 'stealth'
|
proxy?: 'basic' | 'stealth'
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parameters for scraping operations.
|
* Parameters for scraping operations.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeParams<
|
export interface ScrapeParams<
|
||||||
LLMSchema extends z.ZodSchema = any,
|
LLMSchema extends z.ZodSchema = any,
|
||||||
ActionsSchema extends Action[] | undefined = undefined
|
ActionsSchema extends Action[] | undefined = undefined
|
||||||
> {
|
> {
|
||||||
formats?: (
|
formats?: (
|
||||||
| 'markdown'
|
| 'markdown'
|
||||||
| 'html'
|
| 'html'
|
||||||
|
@ -155,9 +166,9 @@ export interface ScrapeParams<
|
||||||
systemPrompt?: string
|
systemPrompt?: string
|
||||||
}
|
}
|
||||||
actions?: ActionsSchema
|
actions?: ActionsSchema
|
||||||
}
|
}
|
||||||
|
|
||||||
export type Action =
|
export type Action =
|
||||||
| {
|
| {
|
||||||
type: 'wait'
|
type: 'wait'
|
||||||
milliseconds?: number
|
milliseconds?: number
|
||||||
|
@ -192,26 +203,26 @@ export type Action =
|
||||||
script: string
|
script: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ActionsResult {
|
export interface ActionsResult {
|
||||||
screenshots: string[]
|
screenshots: string[]
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response interface for scraping operations.
|
* Response interface for scraping operations.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeResponse<
|
export interface ScrapeResponse<
|
||||||
LLMResult = any,
|
LLMResult = any,
|
||||||
ActionsSchema extends ActionsResult | never = never
|
ActionsSchema extends ActionsResult | never = never
|
||||||
> extends FirecrawlDocument<LLMResult, ActionsSchema> {
|
> extends Document<LLMResult, ActionsSchema> {
|
||||||
success: true
|
success: true
|
||||||
warning?: string
|
warning?: string
|
||||||
error?: string
|
error?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parameters for search operations.
|
* Parameters for search operations.
|
||||||
*/
|
*/
|
||||||
export interface SearchParams {
|
export interface SearchParams {
|
||||||
limit?: number
|
limit?: number
|
||||||
tbs?: string
|
tbs?: string
|
||||||
filter?: string
|
filter?: string
|
||||||
|
@ -221,22 +232,22 @@ export interface SearchParams {
|
||||||
origin?: string
|
origin?: string
|
||||||
timeout?: number
|
timeout?: number
|
||||||
scrapeOptions?: ScrapeParams
|
scrapeOptions?: ScrapeParams
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response interface for search operations.
|
* Response interface for search operations.
|
||||||
*/
|
*/
|
||||||
export interface SearchResponse {
|
export interface SearchResponse {
|
||||||
success: boolean
|
success: boolean
|
||||||
data: FirecrawlDocument<undefined>[]
|
data: Document[]
|
||||||
warning?: string
|
warning?: string
|
||||||
error?: string
|
error?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parameters for crawling operations.
|
* Parameters for crawling operations.
|
||||||
*/
|
*/
|
||||||
export interface CrawlParams {
|
export interface CrawlParams {
|
||||||
includePaths?: string[]
|
includePaths?: string[]
|
||||||
excludePaths?: string[]
|
excludePaths?: string[]
|
||||||
maxDepth?: number
|
maxDepth?: number
|
||||||
|
@ -257,22 +268,22 @@ export interface CrawlParams {
|
||||||
deduplicateSimilarURLs?: boolean
|
deduplicateSimilarURLs?: boolean
|
||||||
ignoreQueryParameters?: boolean
|
ignoreQueryParameters?: boolean
|
||||||
regexOnFullURL?: boolean
|
regexOnFullURL?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response interface for crawling operations.
|
* Response interface for crawling operations.
|
||||||
*/
|
*/
|
||||||
export interface CrawlResponse {
|
export interface CrawlResponse {
|
||||||
id?: string
|
id?: string
|
||||||
url?: string
|
url?: string
|
||||||
success: true
|
success: true
|
||||||
error?: string
|
error?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response interface for job status checks.
|
* Response interface for job status checks.
|
||||||
*/
|
*/
|
||||||
export interface CrawlStatusResponse {
|
export interface CrawlStatusResponse {
|
||||||
success: true
|
success: true
|
||||||
status: 'scraping' | 'completed' | 'failed' | 'cancelled'
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled'
|
||||||
completed: number
|
completed: number
|
||||||
|
@ -280,13 +291,13 @@ export interface CrawlStatusResponse {
|
||||||
creditsUsed: number
|
creditsUsed: number
|
||||||
expiresAt: Date
|
expiresAt: Date
|
||||||
next?: string
|
next?: string
|
||||||
data: FirecrawlDocument<undefined>[]
|
data: Document[]
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response interface for crawl errors.
|
* Response interface for crawl errors.
|
||||||
*/
|
*/
|
||||||
export interface CrawlErrorsResponse {
|
export interface CrawlErrorsResponse {
|
||||||
errors: {
|
errors: {
|
||||||
id: string
|
id: string
|
||||||
timestamp?: string
|
timestamp?: string
|
||||||
|
@ -294,20 +305,20 @@ export interface CrawlErrorsResponse {
|
||||||
error: string
|
error: string
|
||||||
}[]
|
}[]
|
||||||
robotsBlocked: string[]
|
robotsBlocked: string[]
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Error response interface.
|
* Error response interface.
|
||||||
*/
|
*/
|
||||||
export interface ErrorResponse {
|
export interface ErrorResponse {
|
||||||
success: false
|
success: false
|
||||||
error: string
|
error: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Custom error class for Firecrawl.
|
* Custom error class for Firecrawl.
|
||||||
*/
|
*/
|
||||||
export class FirecrawlError extends Error {
|
export class FirecrawlError extends Error {
|
||||||
statusCode: number
|
statusCode: number
|
||||||
details?: any
|
details?: any
|
||||||
|
|
||||||
|
@ -316,12 +327,12 @@ export class FirecrawlError extends Error {
|
||||||
this.statusCode = statusCode
|
this.statusCode = statusCode
|
||||||
this.details = details
|
this.details = details
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parameters for extracting information from URLs.
|
* Parameters for extracting information from URLs.
|
||||||
*/
|
*/
|
||||||
export interface ExtractParams<T extends z.ZodSchema = any> {
|
export interface ExtractParams<T extends z.ZodSchema = any> {
|
||||||
prompt: string
|
prompt: string
|
||||||
schema?: T
|
schema?: T
|
||||||
enableWebSearch?: boolean
|
enableWebSearch?: boolean
|
||||||
|
@ -329,35 +340,36 @@ export interface ExtractParams<T extends z.ZodSchema = any> {
|
||||||
includeSubdomains?: boolean
|
includeSubdomains?: boolean
|
||||||
showSources?: boolean
|
showSources?: boolean
|
||||||
scrapeOptions?: ScrapeOptions
|
scrapeOptions?: ScrapeOptions
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response interface for extracting information from URLs.
|
* Response interface for extracting information from URLs.
|
||||||
* Defines the structure of the response received after extracting information from URLs.
|
* Defines the structure of the response received after extracting information from URLs.
|
||||||
*/
|
*/
|
||||||
export interface ExtractResponse<T = any> {
|
export interface ExtractResponse<T = any> {
|
||||||
success: boolean
|
success: boolean
|
||||||
id?: string
|
id?: string
|
||||||
data: T
|
data: T
|
||||||
error?: string
|
error?: string
|
||||||
warning?: string
|
warning?: string
|
||||||
sources?: string[]
|
sources?: string[]
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response interface for extract status operations.
|
* Response interface for extract status operations.
|
||||||
*/
|
*/
|
||||||
export interface ExtractStatusResponse<T = any> {
|
export interface ExtractStatusResponse<T = any> {
|
||||||
success: boolean
|
success: boolean
|
||||||
status: 'processing' | 'completed' | 'failed'
|
status: 'processing' | 'completed' | 'failed'
|
||||||
data?: T
|
data?: T
|
||||||
error?: string
|
error?: string
|
||||||
expiresAt?: string
|
expiresAt?: string
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
|
/**
|
||||||
* Parameters for LLMs.txt generation operations.
|
* Parameters for LLMs.txt generation operations.
|
||||||
*/
|
*/
|
||||||
export interface GenerateLLMsTextParams {
|
export interface GenerateLLMsTextParams {
|
||||||
/**
|
/**
|
||||||
* Maximum number of URLs to process (1-100)
|
* Maximum number of URLs to process (1-100)
|
||||||
* @default 10
|
* @default 10
|
||||||
|
@ -368,20 +380,20 @@ export interface GenerateLLMsTextParams {
|
||||||
* @default false
|
* @default false
|
||||||
*/
|
*/
|
||||||
showFullText?: boolean
|
showFullText?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response interface for LLMs.txt generation operations.
|
* Response interface for LLMs.txt generation operations.
|
||||||
*/
|
*/
|
||||||
export interface GenerateLLMsTextResponse {
|
export interface GenerateLLMsTextResponse {
|
||||||
success: boolean
|
success: boolean
|
||||||
id: string
|
id: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Status response interface for LLMs.txt generation operations.
|
* Status response interface for LLMs.txt generation operations.
|
||||||
*/
|
*/
|
||||||
export interface GenerateLLMsTextStatusResponse {
|
export interface GenerateLLMsTextStatusResponse {
|
||||||
success: boolean
|
success: boolean
|
||||||
data: {
|
data: {
|
||||||
llmstxt: string
|
llmstxt: string
|
||||||
|
@ -390,17 +402,7 @@ export interface GenerateLLMsTextStatusResponse {
|
||||||
status: 'processing' | 'completed' | 'failed'
|
status: 'processing' | 'completed' | 'failed'
|
||||||
error?: string
|
error?: string
|
||||||
expiresAt: string
|
expiresAt: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export namespace firecrawl {
|
|
||||||
export const BASE_URL = 'https://api.firecrawl.dev'
|
|
||||||
|
|
||||||
// Allow up to 50 request per minute by default.
|
|
||||||
export const throttle = pThrottle({
|
|
||||||
limit: 1,
|
|
||||||
interval: 1200,
|
|
||||||
strict: true
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -457,7 +459,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
/**
|
/**
|
||||||
* Sends a POST request.
|
* Sends a POST request.
|
||||||
*/
|
*/
|
||||||
private async postRequest(path: string, data: any): Promise<any> {
|
protected async postRequest(path: string, data: any): Promise<any> {
|
||||||
try {
|
try {
|
||||||
const response = await this.ky.post(path, { json: data })
|
const response = await this.ky.post(path, { json: data })
|
||||||
return await response.json()
|
return await response.json()
|
||||||
|
@ -465,7 +467,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
if (err instanceof Error) {
|
if (err instanceof Error) {
|
||||||
const response = await (err as any).response?.json()
|
const response = await (err as any).response?.json()
|
||||||
if (response?.error) {
|
if (response?.error) {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Request failed. Error: ${response.error}`,
|
`Request failed. Error: ${response.error}`,
|
||||||
(err as any).response?.status ?? 500,
|
(err as any).response?.status ?? 500,
|
||||||
response?.details
|
response?.details
|
||||||
|
@ -479,7 +481,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
/**
|
/**
|
||||||
* Sends a GET request.
|
* Sends a GET request.
|
||||||
*/
|
*/
|
||||||
private async getRequest(path: string): Promise<any> {
|
protected async getRequest(path: string): Promise<any> {
|
||||||
try {
|
try {
|
||||||
const response = await this.ky.get(path)
|
const response = await this.ky.get(path)
|
||||||
return await response.json()
|
return await response.json()
|
||||||
|
@ -487,7 +489,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
if (err instanceof Error) {
|
if (err instanceof Error) {
|
||||||
const response = await (err as any).response?.json()
|
const response = await (err as any).response?.json()
|
||||||
if (response?.error) {
|
if (response?.error) {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Request failed. Error: ${response.error}`,
|
`Request failed. Error: ${response.error}`,
|
||||||
(err as any).response?.status ?? 500,
|
(err as any).response?.status ?? 500,
|
||||||
response?.details
|
response?.details
|
||||||
|
@ -501,7 +503,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
/**
|
/**
|
||||||
* Sends a DELETE request.
|
* Sends a DELETE request.
|
||||||
*/
|
*/
|
||||||
private async deleteRequest(path: string): Promise<any> {
|
protected async deleteRequest(path: string): Promise<any> {
|
||||||
try {
|
try {
|
||||||
const response = await this.ky.delete(path)
|
const response = await this.ky.delete(path)
|
||||||
return await response.json()
|
return await response.json()
|
||||||
|
@ -509,7 +511,7 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
if (err instanceof Error) {
|
if (err instanceof Error) {
|
||||||
const response = await (err as any).response?.json()
|
const response = await (err as any).response?.json()
|
||||||
if (response?.error) {
|
if (response?.error) {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Request failed. Error: ${response.error}`,
|
`Request failed. Error: ${response.error}`,
|
||||||
(err as any).response?.status ?? 500,
|
(err as any).response?.status ?? 500,
|
||||||
response?.details
|
response?.details
|
||||||
|
@ -521,21 +523,33 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scrapes a URL using the Firecrawl API.
|
* Scrape the contents of a URL.
|
||||||
*/
|
*/
|
||||||
|
@aiFunction({
|
||||||
|
name: 'firecrawl_scrape_url',
|
||||||
|
description: 'Scrape the contents of a URL.',
|
||||||
|
inputSchema: z.object({
|
||||||
|
url: z.string().url().describe('The URL to scrape.')
|
||||||
|
})
|
||||||
|
})
|
||||||
async scrapeUrl<
|
async scrapeUrl<
|
||||||
T extends z.ZodSchema,
|
T extends z.ZodSchema,
|
||||||
ActionsSchema extends Action[] | undefined = undefined
|
ActionsSchema extends firecrawl.Action[] | undefined = undefined
|
||||||
>(
|
>(
|
||||||
url: string,
|
orlOrOpts:
|
||||||
params?: ScrapeParams<T, ActionsSchema>
|
| string
|
||||||
|
| ({ url: string } & firecrawl.ScrapeParams<T, ActionsSchema>)
|
||||||
): Promise<
|
): Promise<
|
||||||
| ScrapeResponse<
|
| firecrawl.ScrapeResponse<
|
||||||
z.infer<T>,
|
z.infer<T>,
|
||||||
ActionsSchema extends Action[] ? ActionsResult : never
|
ActionsSchema extends firecrawl.Action[]
|
||||||
|
? firecrawl.ActionsResult
|
||||||
|
: never
|
||||||
>
|
>
|
||||||
| ErrorResponse
|
| firecrawl.ErrorResponse
|
||||||
> {
|
> {
|
||||||
|
const { url, ...params } =
|
||||||
|
typeof orlOrOpts === 'string' ? { url: orlOrOpts } : orlOrOpts
|
||||||
let jsonData: any = { url, ...params }
|
let jsonData: any = { url, ...params }
|
||||||
|
|
||||||
if (jsonData?.extract?.schema) {
|
if (jsonData?.extract?.schema) {
|
||||||
|
@ -570,10 +584,10 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
const response = await this.postRequest('v1/scrape', jsonData)
|
const response = await this.postRequest('v1/scrape', jsonData)
|
||||||
return response
|
return response
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof FirecrawlError) {
|
if (err instanceof firecrawl.FirecrawlError) {
|
||||||
throw err
|
throw err
|
||||||
}
|
}
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
err instanceof Error ? err.message : 'Unknown error',
|
err instanceof Error ? err.message : 'Unknown error',
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
|
@ -583,7 +597,19 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
/**
|
/**
|
||||||
* Searches using the Firecrawl API.
|
* Searches using the Firecrawl API.
|
||||||
*/
|
*/
|
||||||
async search(query: string, params?: SearchParams): Promise<SearchResponse> {
|
@aiFunction({
|
||||||
|
name: 'firecrawl_search',
|
||||||
|
description: 'Searches the internet for the given query.',
|
||||||
|
inputSchema: z.object({
|
||||||
|
query: z.string().describe('Search query.')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
async search(
|
||||||
|
queryOrOpts: string | ({ query: string } & firecrawl.SearchParams)
|
||||||
|
): Promise<firecrawl.SearchResponse> {
|
||||||
|
const { query, ...params } =
|
||||||
|
typeof queryOrOpts === 'string' ? { query: queryOrOpts } : queryOrOpts
|
||||||
|
|
||||||
const jsonData = {
|
const jsonData = {
|
||||||
query,
|
query,
|
||||||
limit: params?.limit ?? 5,
|
limit: params?.limit ?? 5,
|
||||||
|
@ -602,23 +628,23 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
if (response.success) {
|
if (response.success) {
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
data: response.data as FirecrawlDocument<any>[],
|
data: response.data as firecrawl.Document[],
|
||||||
warning: response.warning
|
warning: response.warning
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Failed to search. Error: ${response.error}`,
|
`Failed to search. Error: ${response.error}`,
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
if (err.response?.data?.error) {
|
if (err.response?.data?.error) {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
|
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
|
||||||
err.response.status
|
err.response.status
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
throw new FirecrawlError(err.message, 500)
|
throw new firecrawl.FirecrawlError(err.message, 500)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return { success: false, error: 'Internal server error.', data: [] }
|
return { success: false, error: 'Internal server error.', data: [] }
|
||||||
|
@ -627,10 +653,18 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL.
|
* Initiates a crawl job for a URL.
|
||||||
*/
|
*/
|
||||||
|
@aiFunction({
|
||||||
|
name: 'firecrawl_crawl_url',
|
||||||
|
description: 'Initiates a crawl job for a URL.',
|
||||||
|
inputSchema: z.object({
|
||||||
|
url: z.string().url().describe('The URL to crawl.')
|
||||||
|
})
|
||||||
|
})
|
||||||
async crawlUrl(
|
async crawlUrl(
|
||||||
url: string,
|
urlOrOpts: string | ({ url: string } & firecrawl.CrawlParams)
|
||||||
params?: CrawlParams
|
): Promise<firecrawl.CrawlResponse | firecrawl.ErrorResponse> {
|
||||||
): Promise<CrawlResponse | ErrorResponse> {
|
const { url, ...params } =
|
||||||
|
typeof urlOrOpts === 'string' ? { url: urlOrOpts } : urlOrOpts
|
||||||
const jsonData = { url, ...params }
|
const jsonData = { url, ...params }
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -638,19 +672,19 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
if (response.success) {
|
if (response.success) {
|
||||||
return response
|
return response
|
||||||
} else {
|
} else {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Failed to start crawl job. Error: ${response.error}`,
|
`Failed to start crawl job. Error: ${response.error}`,
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
if (err.response?.data?.error) {
|
if (err.response?.data?.error) {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
|
`Request failed with status code ${err.response.status}. Error: ${err.response.data.error} ${err.response.data.details ? ` - ${JSON.stringify(err.response.data.details)}` : ''}`,
|
||||||
err.response.status
|
err.response.status
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
throw new FirecrawlError(err.message, 500)
|
throw new firecrawl.FirecrawlError(err.message, 500)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return { success: false, error: 'Internal server error.' }
|
return { success: false, error: 'Internal server error.' }
|
||||||
|
@ -661,9 +695,9 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
*/
|
*/
|
||||||
async checkCrawlStatus(
|
async checkCrawlStatus(
|
||||||
id: string
|
id: string
|
||||||
): Promise<CrawlStatusResponse | ErrorResponse> {
|
): Promise<firecrawl.CrawlStatusResponse | firecrawl.ErrorResponse> {
|
||||||
if (!id) {
|
if (!id) {
|
||||||
throw new FirecrawlError('No crawl ID provided', 400)
|
throw new firecrawl.FirecrawlError('No crawl ID provided', 400)
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -671,13 +705,13 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
if (response.success) {
|
if (response.success) {
|
||||||
return response
|
return response
|
||||||
} else {
|
} else {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Failed to check crawl status. Error: ${response.error}`,
|
`Failed to check crawl status. Error: ${response.error}`,
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
throw new FirecrawlError(err.message, 500)
|
throw new firecrawl.FirecrawlError(err.message, 500)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -686,51 +720,52 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
*/
|
*/
|
||||||
async checkCrawlErrors(
|
async checkCrawlErrors(
|
||||||
id: string
|
id: string
|
||||||
): Promise<CrawlErrorsResponse | ErrorResponse> {
|
): Promise<firecrawl.CrawlErrorsResponse | firecrawl.ErrorResponse> {
|
||||||
try {
|
try {
|
||||||
const response = await this.getRequest(`v1/crawl/${id}/errors`)
|
const response = await this.getRequest(`v1/crawl/${id}/errors`)
|
||||||
if (response.errors) {
|
if (response.errors) {
|
||||||
return response
|
return response
|
||||||
} else {
|
} else {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Failed to check crawl errors. Error: ${response.error}`,
|
`Failed to check crawl errors. Error: ${response.error}`,
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
throw new FirecrawlError(err.message, 500)
|
throw new firecrawl.FirecrawlError(err.message, 500)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cancels a crawl job.
|
* Cancels a crawl job.
|
||||||
*/
|
*/
|
||||||
async cancelCrawl(id: string): Promise<ErrorResponse> {
|
async cancelCrawl(id: string): Promise<firecrawl.ErrorResponse> {
|
||||||
try {
|
try {
|
||||||
const response = await this.deleteRequest(`v1/crawl/${id}`)
|
const response = await this.deleteRequest(`v1/crawl/${id}`)
|
||||||
if (response.status) {
|
if (response.status) {
|
||||||
return response
|
return response
|
||||||
} else {
|
} else {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
`Failed to cancel crawl job. Error: ${response.error}`,
|
`Failed to cancel crawl job. Error: ${response.error}`,
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
throw new FirecrawlError(err.message, 500)
|
throw new firecrawl.FirecrawlError(err.message, 500)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts structured data from URLs using LLMs.
|
* Extracts structured data from URLs using LLMs.
|
||||||
|
*
|
||||||
* @param urls - Array of URLs to extract data from
|
* @param urls - Array of URLs to extract data from
|
||||||
* @param params - Additional parameters for the extract request
|
* @param params - Additional parameters for the extract request
|
||||||
* @returns The response from the extract operation
|
* @returns The response from the extract operation
|
||||||
*/
|
*/
|
||||||
async extract<T extends z.ZodSchema>(
|
async extract<T extends z.ZodSchema>(
|
||||||
urls: string[],
|
urls: string[],
|
||||||
params: ExtractParams<T>
|
params: firecrawl.ExtractParams<T>
|
||||||
): Promise<ExtractResponse<z.infer<T>>> {
|
): Promise<firecrawl.ExtractResponse<z.infer<T>>> {
|
||||||
const jsonData = {
|
const jsonData = {
|
||||||
urls,
|
urls,
|
||||||
...params,
|
...params,
|
||||||
|
@ -740,17 +775,17 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
try {
|
try {
|
||||||
const response = await this.postRequest('v1/extract', jsonData)
|
const response = await this.postRequest('v1/extract', jsonData)
|
||||||
if (!response.success) {
|
if (!response.success) {
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
response.error || 'Extract operation failed',
|
response.error || 'Extract operation failed',
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
return response
|
return response
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof FirecrawlError) {
|
if (err instanceof firecrawl.FirecrawlError) {
|
||||||
throw err
|
throw err
|
||||||
}
|
}
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
err instanceof Error ? err.message : 'Unknown error',
|
err instanceof Error ? err.message : 'Unknown error',
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
|
@ -762,19 +797,19 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
*/
|
*/
|
||||||
async checkExtractStatus<T = any>(
|
async checkExtractStatus<T = any>(
|
||||||
id: string
|
id: string
|
||||||
): Promise<ExtractStatusResponse<T>> {
|
): Promise<firecrawl.ExtractStatusResponse<T>> {
|
||||||
if (!id) {
|
if (!id) {
|
||||||
throw new FirecrawlError('No extract ID provided', 400)
|
throw new firecrawl.FirecrawlError('No extract ID provided', 400)
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await this.getRequest(`v1/extract/${id}`)
|
const response = await this.getRequest(`v1/extract/${id}`)
|
||||||
return response
|
return response
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof FirecrawlError) {
|
if (err instanceof firecrawl.FirecrawlError) {
|
||||||
throw err
|
throw err
|
||||||
}
|
}
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
err instanceof Error ? err.message : 'Unknown error',
|
err instanceof Error ? err.message : 'Unknown error',
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
|
@ -786,8 +821,10 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
*/
|
*/
|
||||||
async generateLLMsText(
|
async generateLLMsText(
|
||||||
url: string,
|
url: string,
|
||||||
params: GenerateLLMsTextParams
|
params?: firecrawl.GenerateLLMsTextParams
|
||||||
): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
|
): Promise<
|
||||||
|
firecrawl.GenerateLLMsTextStatusResponse | firecrawl.ErrorResponse
|
||||||
|
> {
|
||||||
const jsonData = {
|
const jsonData = {
|
||||||
url,
|
url,
|
||||||
...params
|
...params
|
||||||
|
@ -797,10 +834,10 @@ export class FirecrawlClient extends AIFunctionsProvider {
|
||||||
const response = await this.postRequest('v1/llmstxt', jsonData)
|
const response = await this.postRequest('v1/llmstxt', jsonData)
|
||||||
return response
|
return response
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof FirecrawlError) {
|
if (err instanceof firecrawl.FirecrawlError) {
|
||||||
throw err
|
throw err
|
||||||
}
|
}
|
||||||
throw new FirecrawlError(
|
throw new firecrawl.FirecrawlError(
|
||||||
err instanceof Error ? err.message : 'Unknown error',
|
err instanceof Error ? err.message : 'Unknown error',
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,13 +1 @@
|
||||||
export type {
|
export * from './firecrawl-client'
|
||||||
ErrorResponse,
|
|
||||||
ExtractParams,
|
|
||||||
ExtractResponse,
|
|
||||||
ExtractStatusResponse,
|
|
||||||
FirecrawlClientConfig,
|
|
||||||
GenerateLLMsTextParams,
|
|
||||||
GenerateLLMsTextResponse,
|
|
||||||
GenerateLLMsTextStatusResponse,
|
|
||||||
ScrapeParams,
|
|
||||||
ScrapeResponse
|
|
||||||
} from './firecrawl-client.js'
|
|
||||||
export { FirecrawlClient } from './firecrawl-client.js'
|
|
||||||
|
|
Ładowanie…
Reference in New Issue