/* eslint-disable unicorn/no-unreadable-iife */ /* eslint-disable unicorn/no-array-reduce */ /** * This file was auto-generated from an OpenAPI spec. */ import { aiFunction, AIFunctionsProvider, assert, getEnv, pick } from '@agentic/core' import defaultKy, { type KyInstance } from 'ky' import { z } from 'zod' export namespace firecrawl { export const apiBaseUrl = 'https://api.firecrawl.dev/v0' // ----------------------------------------------------------------------------- // Component schemas // ----------------------------------------------------------------------------- export const ScrapeResponseSchema = z.object({ success: z.boolean().optional(), /** Warning message to let you know of any issues. */ warning: z .string() .describe('Warning message to let you know of any issues.') .optional(), data: z .object({ /** Markdown content of the page if the `markdown` format was specified (default) */ markdown: z .string() .describe( 'Markdown content of the page if the `markdown` format was specified (default)' ) .optional(), /** HTML version of the content on page if the `html` format was specified */ html: z .string() .describe( 'HTML version of the content on page if the `html` format was specified' ) .optional(), /** Raw HTML content of the page if the `rawHtml` format was specified */ rawHtml: z .string() .describe( 'Raw HTML content of the page if the `rawHtml` format was specified' ) .optional(), /** Links on the page if the `links` format was specified */ links: z .array(z.string().url()) .describe('Links on the page if the `links` format was specified') .optional(), /** URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified */ screenshot: z .string() .describe( 'URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified' ) .optional(), metadata: z .object({ title: z.string().optional(), description: z.string().optional(), language: z.string().optional(), sourceURL: z.string().url().optional(), ' ': z.string().optional(), /** The status code of the page */ statusCode: z .number() .int() .describe('The status code of the page') .optional(), /** The error message of the page */ error: z .string() .describe('The error message of the page') .optional() }) .optional() }) .optional() }) export type ScrapeResponse = z.infer export const CrawlResponseSchema = z.object({ success: z.boolean().optional(), id: z.string().optional(), url: z.string().url().optional() }) export type CrawlResponse = z.infer export const SearchResponseSchema = z.object({ success: z.boolean().optional(), data: z.array(z.any()).optional() }) export type SearchResponse = z.infer export const CrawlStatusResponseObjSchema = z.object({ /** Markdown content of the page if the `markdown` format was specified (default) */ markdown: z .string() .describe( 'Markdown content of the page if the `markdown` format was specified (default)' ) .optional(), /** HTML version of the content on page if the `html` format was specified */ html: z .string() .describe( 'HTML version of the content on page if the `html` format was specified' ) .optional(), /** Raw HTML content of the page if the `rawHtml` format was specified */ rawHtml: z .string() .describe( 'Raw HTML content of the page if the `rawHtml` format was specified' ) .optional(), /** Links on the page if the `links` format was specified */ links: z .array(z.string().url()) .describe('Links on the page if the `links` format was specified') .optional(), /** URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified */ screenshot: z .string() .describe( 'URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified' ) .optional(), metadata: z .object({ title: z.string().optional(), description: z.string().optional(), language: z.string().optional(), sourceURL: z.string().url().optional(), ' ': z.string().optional(), /** The status code of the page */ statusCode: z .number() .int() .describe('The status code of the page') .optional(), /** The error message of the page */ error: z.string().describe('The error message of the page').optional() }) .optional() }) export type CrawlStatusResponseObj = z.infer< typeof CrawlStatusResponseObjSchema > // ----------------------------------------------------------------------------- // Operation schemas // ----------------------------------------------------------------------------- export const ScrapeParamsSchema = z.object({ /** The URL to scrape */ url: z.string().url().describe('The URL to scrape'), /** * Specific formats to return. * * - markdown: The page in Markdown format. * - html: The page's HTML, trimmed to include only meaningful content. * - rawHtml: The page's original HTML. * - links: The links on the page. * - screenshot: A screenshot of the top of the page. * - screenshot@fullPage: A screenshot of the full page. (overridden by screenshot if present) */ formats: z .array( z.enum([ 'markdown', 'html', 'rawHtml', 'links', 'screenshot', 'screenshot@fullPage' ]) ) .describe( "Specific formats to return.\n\n - markdown: The page in Markdown format.\n - html: The page's HTML, trimmed to include only meaningful content.\n - rawHtml: The page's original HTML.\n - links: The links on the page.\n - screenshot: A screenshot of the top of the page.\n - screenshot@fullPage: A screenshot of the full page. (overridden by screenshot if present)" ) .default(['markdown']), /** Headers to send with the request. Can be used to send cookies, user-agent, etc. */ headers: z .record(z.any()) .describe( 'Headers to send with the request. Can be used to send cookies, user-agent, etc.' ) .optional(), /** Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer' */ includeTags: z .array(z.string()) .describe( "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'" ) .optional(), /** Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer' */ excludeTags: z .array(z.string()) .describe( "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'" ) .optional(), /** Only return the main content of the page excluding headers, navs, footers, etc. */ onlyMainContent: z .boolean() .describe( 'Only return the main content of the page excluding headers, navs, footers, etc.' ) .default(true), /** Timeout in milliseconds for the request */ timeout: z .number() .int() .describe('Timeout in milliseconds for the request') .default(30_000), /** Wait x amount of milliseconds for the page to load to fetch content */ waitFor: z .number() .int() .describe( 'Wait x amount of milliseconds for the page to load to fetch content' ) .default(0) }) export type ScrapeParams = z.infer export const CrawlUrlsParamsSchema = z.object({ /** The base URL to start crawling from */ url: z.string().url().describe('The base URL to start crawling from'), crawlerOptions: z .object({ /** URL patterns to include */ includes: z .array(z.string()) .describe('URL patterns to include') .optional(), /** URL patterns to exclude */ excludes: z .array(z.string()) .describe('URL patterns to exclude') .optional(), /** Generate alt text for images using LLMs (must have a paid plan) */ generateImgAltText: z .boolean() .describe( 'Generate alt text for images using LLMs (must have a paid plan)' ) .default(false), /** If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents. */ returnOnlyUrls: z .boolean() .describe( 'If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.' ) .default(false), /** Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern. */ maxDepth: z .number() .int() .describe( 'Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern.' ) .optional(), /** The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites. */ mode: z .enum(['default', 'fast']) .describe( "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites." ) .default('default'), /** Ignore the website sitemap when crawling */ ignoreSitemap: z .boolean() .describe('Ignore the website sitemap when crawling') .default(false), /** Maximum number of pages to crawl */ limit: z .number() .int() .describe('Maximum number of pages to crawl') .default(10_000), /** Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product' */ allowBackwardCrawling: z .boolean() .describe( "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'" ) .default(false), /** Allows the crawler to follow links to external websites. */ allowExternalContentLinks: z .boolean() .describe('Allows the crawler to follow links to external websites.') .default(false) }) .optional(), pageOptions: z .object({ /** Headers to send with the request. Can be used to send cookies, user-agent, etc. */ headers: z .record(z.any()) .describe( 'Headers to send with the request. Can be used to send cookies, user-agent, etc.' ) .optional(), /** Include the HTML version of the content on page. Will output a html key in the response. */ includeHtml: z .boolean() .describe( 'Include the HTML version of the content on page. Will output a html key in the response.' ) .default(false), /** Include the raw HTML content of the page. Will output a rawHtml key in the response. */ includeRawHtml: z .boolean() .describe( 'Include the raw HTML content of the page. Will output a rawHtml key in the response.' ) .default(false), /** Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer' */ onlyIncludeTags: z .array(z.string()) .describe( "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'" ) .optional(), /** Only return the main content of the page excluding headers, navs, footers, etc. */ onlyMainContent: z .boolean() .describe( 'Only return the main content of the page excluding headers, navs, footers, etc.' ) .default(false), /** Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer' */ removeTags: z .array(z.string()) .describe( "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'" ) .optional(), /** Replace all relative paths with absolute paths for images and links */ replaceAllPathsWithAbsolutePaths: z .boolean() .describe( 'Replace all relative paths with absolute paths for images and links' ) .default(false), /** Include a screenshot of the top of the page that you are scraping. */ screenshot: z .boolean() .describe( 'Include a screenshot of the top of the page that you are scraping.' ) .default(false), /** Include a full page screenshot of the page that you are scraping. */ fullPageScreenshot: z .boolean() .describe( 'Include a full page screenshot of the page that you are scraping.' ) .default(false), /** Wait x amount of milliseconds for the page to load to fetch content */ waitFor: z .number() .int() .describe( 'Wait x amount of milliseconds for the page to load to fetch content' ) .default(0) }) .optional() }) export type CrawlUrlsParams = z.infer export const CrawlUrlsResponseSchema = CrawlResponseSchema export type CrawlUrlsResponse = z.infer export const SearchGoogleParamsSchema = z.object({ /** The query to search for */ query: z.string().url().describe('The query to search for'), pageOptions: z .object({ /** Only return the main content of the page excluding headers, navs, footers, etc. */ onlyMainContent: z .boolean() .describe( 'Only return the main content of the page excluding headers, navs, footers, etc.' ) .default(false), /** Fetch the content of each page. If false, defaults to a basic fast serp API. */ fetchPageContent: z .boolean() .describe( 'Fetch the content of each page. If false, defaults to a basic fast serp API.' ) .default(true), /** Include the HTML version of the content on page. Will output a html key in the response. */ includeHtml: z .boolean() .describe( 'Include the HTML version of the content on page. Will output a html key in the response.' ) .default(false), /** Include the raw HTML content of the page. Will output a rawHtml key in the response. */ includeRawHtml: z .boolean() .describe( 'Include the raw HTML content of the page. Will output a rawHtml key in the response.' ) .default(false) }) .optional(), searchOptions: z .object({ /** Maximum number of results. Max is 20 during beta. */ limit: z .number() .int() .describe('Maximum number of results. Max is 20 during beta.') .optional() }) .optional() }) export type SearchGoogleParams = z.infer export const SearchGoogleResponseSchema = SearchResponseSchema export type SearchGoogleResponse = z.infer export const GetCrawlStatusParamsSchema = z.object({ /** ID of the crawl job */ jobId: z.string().describe('ID of the crawl job') }) export type GetCrawlStatusParams = z.infer export const GetCrawlStatusResponseSchema = z.object({ /** Status of the job (completed, active, failed, paused) */ status: z .string() .describe('Status of the job (completed, active, failed, paused)') .optional(), /** Current page number */ current: z.number().int().describe('Current page number').optional(), /** Total number of pages */ total: z.number().int().describe('Total number of pages').optional(), /** Data returned from the job (null when it is in progress) */ data: z .array(CrawlStatusResponseObjSchema) .describe('Data returned from the job (null when it is in progress)') .optional(), /** Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array. */ partial_data: z .array(CrawlStatusResponseObjSchema) .describe( 'Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array.' ) .optional() }) export type GetCrawlStatusResponse = z.infer< typeof GetCrawlStatusResponseSchema > export const CancelCrawlJobParamsSchema = z.object({ /** ID of the crawl job */ jobId: z.string().describe('ID of the crawl job') }) export type CancelCrawlJobParams = z.infer export const CancelCrawlJobResponseSchema = z.object({ /** Returns cancelled. */ status: z.string().describe('Returns cancelled.').optional() }) export type CancelCrawlJobResponse = z.infer< typeof CancelCrawlJobResponseSchema > } /** * Agentic Firecrawl client. * * API for interacting with Firecrawl services to perform web scraping and crawling tasks. */ export class FirecrawlClient extends AIFunctionsProvider { protected readonly ky: KyInstance protected readonly apiKey: string protected readonly apiBaseUrl: string constructor({ apiKey = getEnv('FIRECRAWL_API_KEY'), apiBaseUrl = firecrawl.apiBaseUrl, ky = defaultKy }: { apiKey?: string apiBaseUrl?: string ky?: KyInstance } = {}) { assert( apiKey, 'FirecrawlClient missing required "apiKey" (defaults to "FIRECRAWL_API_KEY")' ) super() this.apiKey = apiKey this.apiBaseUrl = apiBaseUrl this.ky = ky.extend({ prefixUrl: apiBaseUrl, headers: { Authorization: apiKey } }) } /** * Scrape a single URL. */ @aiFunction({ name: 'scrape', description: `Scrape a single URL.`, inputSchema: firecrawl.ScrapeParamsSchema }) async scrape( params: firecrawl.ScrapeParams ): Promise { return this.ky .post('/scrape', { json: pick( params, 'url', 'formats', 'headers', 'includeTags', 'excludeTags', 'onlyMainContent', 'timeout', 'waitFor' ) }) .json() } /** * Crawl multiple URLs based on options. */ @aiFunction({ name: 'crawl_urls', description: `Crawl multiple URLs based on options.`, inputSchema: firecrawl.CrawlUrlsParamsSchema }) async crawlUrls( params: firecrawl.CrawlUrlsParams ): Promise { return this.ky .post('/crawl', { json: pick(params, 'url', 'crawlerOptions', 'pageOptions') }) .json() } /** * Search for a keyword in Google, returns top page results with markdown content for each page. */ @aiFunction({ name: 'search_google', description: `Search for a keyword in Google, returns top page results with markdown content for each page.`, inputSchema: firecrawl.SearchGoogleParamsSchema }) async searchGoogle( params: firecrawl.SearchGoogleParams ): Promise { return this.ky .post('/search', { json: pick(params, 'query', 'pageOptions', 'searchOptions') }) .json() } /** * Get the status of a crawl job. */ @aiFunction({ name: 'get_crawl_status', description: `Get the status of a crawl job.`, inputSchema: firecrawl.GetCrawlStatusParamsSchema }) async getCrawlStatus( params: firecrawl.GetCrawlStatusParams ): Promise { return this.ky .get(`/crawl/status/${params.jobId}`) .json() } /** * Cancel a crawl job. */ @aiFunction({ name: 'cancel_crawl_job', description: `Cancel a crawl job.`, inputSchema: firecrawl.CancelCrawlJobParamsSchema }) async cancelCrawlJob( params: firecrawl.CancelCrawlJobParams ): Promise { return this.ky .delete(`/crawl/cancel/${params.jobId}`) .json() } }