chatgpt-api/packages/openapi-to-ts-client/fixtures/generated/firecrawl-client.ts

604 wiersze
22 KiB
TypeScript

/**
* This file was auto-generated from an OpenAPI spec.
*/
import { aiFunction, AIFunctionsProvider, assert, getEnv } from '@agentic/core'
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
export namespace firecrawl {
export const apiBaseUrl = 'https://api.firecrawl.dev/v0'
export const ScrapeResponseSchema = z.object({
success: z.boolean().optional(),
/** Warning message to let you know of any issues. */
warning: z
.string()
.nullable()
.describe('Warning message to let you know of any issues.')
.optional(),
data: z
.object({
/** Markdown content of the page if the `markdown` format was specified (default) */
markdown: z
.string()
.nullable()
.describe(
'Markdown content of the page if the `markdown` format was specified (default)'
)
.optional(),
/** HTML version of the content on page if the `html` format was specified */
html: z
.string()
.nullable()
.describe(
'HTML version of the content on page if the `html` format was specified'
)
.optional(),
/** Raw HTML content of the page if the `rawHtml` format was specified */
rawHtml: z
.string()
.nullable()
.describe(
'Raw HTML content of the page if the `rawHtml` format was specified'
)
.optional(),
/** Links on the page if the `links` format was specified */
links: z
.array(z.string().url())
.nullable()
.describe('Links on the page if the `links` format was specified')
.optional(),
/** URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified */
screenshot: z
.string()
.nullable()
.describe(
'URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified'
)
.optional(),
metadata: z
.object({
title: z.string().optional(),
description: z.string().optional(),
language: z.string().nullable().optional(),
sourceURL: z.string().url().optional(),
'<any other metadata> ': z.string().optional(),
/** The status code of the page */
statusCode: z
.number()
.int()
.describe('The status code of the page')
.optional(),
/** The error message of the page */
error: z
.string()
.nullable()
.describe('The error message of the page')
.optional()
})
.optional()
})
.optional()
})
export type ScrapeResponse = z.infer<typeof ScrapeResponseSchema>
export const CrawlResponseSchema = z.object({
success: z.boolean().optional(),
id: z.string().optional(),
url: z.string().url().optional()
})
export type CrawlResponse = z.infer<typeof CrawlResponseSchema>
export const SearchResponseSchema = z.object({
success: z.boolean().optional(),
data: z.array(z.any()).optional()
})
export type SearchResponse = z.infer<typeof SearchResponseSchema>
export const CrawlStatusResponseObjSchema = z.object({
/** Markdown content of the page if the `markdown` format was specified (default) */
markdown: z
.string()
.nullable()
.describe(
'Markdown content of the page if the `markdown` format was specified (default)'
)
.optional(),
/** HTML version of the content on page if the `html` format was specified */
html: z
.string()
.nullable()
.describe(
'HTML version of the content on page if the `html` format was specified'
)
.optional(),
/** Raw HTML content of the page if the `rawHtml` format was specified */
rawHtml: z
.string()
.nullable()
.describe(
'Raw HTML content of the page if the `rawHtml` format was specified'
)
.optional(),
/** Links on the page if the `links` format was specified */
links: z
.array(z.string().url())
.nullable()
.describe('Links on the page if the `links` format was specified')
.optional(),
/** URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified */
screenshot: z
.string()
.nullable()
.describe(
'URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified'
)
.optional(),
metadata: z
.object({
title: z.string().optional(),
description: z.string().optional(),
language: z.string().nullable().optional(),
sourceURL: z.string().url().optional(),
'<any other metadata> ': z.string().optional(),
/** The status code of the page */
statusCode: z
.number()
.int()
.describe('The status code of the page')
.optional(),
/** The error message of the page */
error: z
.string()
.nullable()
.describe('The error message of the page')
.optional()
})
.optional()
})
export type CrawlStatusResponseObj = z.infer<
typeof CrawlStatusResponseObjSchema
>
export const ScrapeParamsSchema = z.object({
/** The URL to scrape */
url: z.string().url().describe('The URL to scrape'),
/**
* Specific formats to return.
*
* - markdown: The page in Markdown format.
* - html: The page's HTML, trimmed to include only meaningful content.
* - rawHtml: The page's original HTML.
* - links: The links on the page.
* - screenshot: A screenshot of the top of the page.
* - screenshot@fullPage: A screenshot of the full page. (overridden by screenshot if present)
*/
formats: z
.array(
z.enum([
'markdown',
'html',
'rawHtml',
'links',
'screenshot',
'screenshot@fullPage'
])
)
.describe(
"Specific formats to return.\n\n - markdown: The page in Markdown format.\n - html: The page's HTML, trimmed to include only meaningful content.\n - rawHtml: The page's original HTML.\n - links: The links on the page.\n - screenshot: A screenshot of the top of the page.\n - screenshot@fullPage: A screenshot of the full page. (overridden by screenshot if present)"
)
.default(['markdown']),
/** Headers to send with the request. Can be used to send cookies, user-agent, etc. */
headers: z
.record(z.any())
.describe(
'Headers to send with the request. Can be used to send cookies, user-agent, etc.'
)
.optional(),
/** Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer' */
includeTags: z
.array(z.string())
.describe(
"Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
)
.optional(),
/** Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer' */
excludeTags: z
.array(z.string())
.describe(
"Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
)
.optional(),
/** Only return the main content of the page excluding headers, navs, footers, etc. */
onlyMainContent: z
.boolean()
.describe(
'Only return the main content of the page excluding headers, navs, footers, etc.'
)
.default(true),
/** Timeout in milliseconds for the request */
timeout: z
.number()
.int()
.describe('Timeout in milliseconds for the request')
.default(30_000),
/** Wait x amount of milliseconds for the page to load to fetch content */
waitFor: z
.number()
.int()
.describe(
'Wait x amount of milliseconds for the page to load to fetch content'
)
.default(0)
})
export type ScrapeParams = z.infer<typeof ScrapeParamsSchema>
export const CrawlUrlsParamsSchema = z.object({
/** The base URL to start crawling from */
url: z.string().url().describe('The base URL to start crawling from'),
crawlerOptions: z
.object({
/** URL patterns to include */
includes: z
.array(z.string())
.describe('URL patterns to include')
.optional(),
/** URL patterns to exclude */
excludes: z
.array(z.string())
.describe('URL patterns to exclude')
.optional(),
/** Generate alt text for images using LLMs (must have a paid plan) */
generateImgAltText: z
.boolean()
.describe(
'Generate alt text for images using LLMs (must have a paid plan)'
)
.default(false),
/** If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents. */
returnOnlyUrls: z
.boolean()
.describe(
'If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.'
)
.default(false),
/** Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern. */
maxDepth: z
.number()
.int()
.describe(
'Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern.'
)
.optional(),
/** The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites. */
mode: z
.enum(['default', 'fast'])
.describe(
"The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites."
)
.default('default'),
/** Ignore the website sitemap when crawling */
ignoreSitemap: z
.boolean()
.describe('Ignore the website sitemap when crawling')
.default(false),
/** Maximum number of pages to crawl */
limit: z
.number()
.int()
.describe('Maximum number of pages to crawl')
.default(10_000),
/** Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product' */
allowBackwardCrawling: z
.boolean()
.describe(
"Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'"
)
.default(false),
/** Allows the crawler to follow links to external websites. */
allowExternalContentLinks: z
.boolean()
.describe('Allows the crawler to follow links to external websites.')
.default(false)
})
.optional(),
pageOptions: z
.object({
/** Headers to send with the request. Can be used to send cookies, user-agent, etc. */
headers: z
.record(z.any())
.describe(
'Headers to send with the request. Can be used to send cookies, user-agent, etc.'
)
.optional(),
/** Include the HTML version of the content on page. Will output a html key in the response. */
includeHtml: z
.boolean()
.describe(
'Include the HTML version of the content on page. Will output a html key in the response.'
)
.default(false),
/** Include the raw HTML content of the page. Will output a rawHtml key in the response. */
includeRawHtml: z
.boolean()
.describe(
'Include the raw HTML content of the page. Will output a rawHtml key in the response.'
)
.default(false),
/** Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer' */
onlyIncludeTags: z
.array(z.string())
.describe(
"Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
)
.optional(),
/** Only return the main content of the page excluding headers, navs, footers, etc. */
onlyMainContent: z
.boolean()
.describe(
'Only return the main content of the page excluding headers, navs, footers, etc.'
)
.default(false),
/** Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer' */
removeTags: z
.array(z.string())
.describe(
"Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
)
.optional(),
/** Replace all relative paths with absolute paths for images and links */
replaceAllPathsWithAbsolutePaths: z
.boolean()
.describe(
'Replace all relative paths with absolute paths for images and links'
)
.default(false),
/** Include a screenshot of the top of the page that you are scraping. */
screenshot: z
.boolean()
.describe(
'Include a screenshot of the top of the page that you are scraping.'
)
.default(false),
/** Include a full page screenshot of the page that you are scraping. */
fullPageScreenshot: z
.boolean()
.describe(
'Include a full page screenshot of the page that you are scraping.'
)
.default(false),
/** Wait x amount of milliseconds for the page to load to fetch content */
waitFor: z
.number()
.int()
.describe(
'Wait x amount of milliseconds for the page to load to fetch content'
)
.default(0)
})
.optional()
})
export type CrawlUrlsParams = z.infer<typeof CrawlUrlsParamsSchema>
export const CrawlUrlsResponseSchema = CrawlResponseSchema
export type CrawlUrlsResponse = z.infer<typeof CrawlUrlsResponseSchema>
export const SearchGoogleParamsSchema = z.object({
/** The query to search for */
query: z.string().url().describe('The query to search for'),
pageOptions: z
.object({
/** Only return the main content of the page excluding headers, navs, footers, etc. */
onlyMainContent: z
.boolean()
.describe(
'Only return the main content of the page excluding headers, navs, footers, etc.'
)
.default(false),
/** Fetch the content of each page. If false, defaults to a basic fast serp API. */
fetchPageContent: z
.boolean()
.describe(
'Fetch the content of each page. If false, defaults to a basic fast serp API.'
)
.default(true),
/** Include the HTML version of the content on page. Will output a html key in the response. */
includeHtml: z
.boolean()
.describe(
'Include the HTML version of the content on page. Will output a html key in the response.'
)
.default(false),
/** Include the raw HTML content of the page. Will output a rawHtml key in the response. */
includeRawHtml: z
.boolean()
.describe(
'Include the raw HTML content of the page. Will output a rawHtml key in the response.'
)
.default(false)
})
.optional(),
searchOptions: z
.object({
/** Maximum number of results. Max is 20 during beta. */
limit: z
.number()
.int()
.describe('Maximum number of results. Max is 20 during beta.')
.optional()
})
.optional()
})
export type SearchGoogleParams = z.infer<typeof SearchGoogleParamsSchema>
export const SearchGoogleResponseSchema = SearchResponseSchema
export type SearchGoogleResponse = z.infer<typeof SearchGoogleResponseSchema>
export const GetCrawlStatusParamsSchema = z.object({
/** ID of the crawl job */
jobId: z.string().describe('ID of the crawl job')
})
export type GetCrawlStatusParams = z.infer<typeof GetCrawlStatusParamsSchema>
export const GetCrawlStatusResponseSchema = z.object({
/** Status of the job (completed, active, failed, paused) */
status: z
.string()
.describe('Status of the job (completed, active, failed, paused)')
.optional(),
/** Current page number */
current: z.number().int().describe('Current page number').optional(),
/** Total number of pages */
total: z.number().int().describe('Total number of pages').optional(),
/** Data returned from the job (null when it is in progress) */
data: z
.array(CrawlStatusResponseObjSchema)
.describe('Data returned from the job (null when it is in progress)')
.optional(),
/** Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array. */
partial_data: z
.array(CrawlStatusResponseObjSchema)
.describe(
'Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array.'
)
.optional()
})
export type GetCrawlStatusResponse = z.infer<
typeof GetCrawlStatusResponseSchema
>
export const CancelCrawlJobParamsSchema = z.object({
/** ID of the crawl job */
jobId: z.string().describe('ID of the crawl job')
})
export type CancelCrawlJobParams = z.infer<typeof CancelCrawlJobParamsSchema>
export const CancelCrawlJobResponseSchema = z.object({
/** Returns cancelled. */
status: z.string().describe('Returns cancelled.').optional()
})
export type CancelCrawlJobResponse = z.infer<
typeof CancelCrawlJobResponseSchema
>
}
export class FirecrawlClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('FIRECRAWL_API_KEY'),
apiBaseUrl = firecrawl.apiBaseUrl,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
apiKey,
'FirecrawlClient missing required "apiKey" (defaults to "FIRECRAWL_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({
prefixUrl: apiBaseUrl,
headers: {
Authorization: apiKey
}
})
}
/**
* Scrape a single URL.
*/
@aiFunction({
name: 'scrape',
description: 'Scrape a single URL.',
inputSchema: firecrawl.ScrapeParamsSchema
})
async scrape(
params: firecrawl.ScrapeParams
): Promise<firecrawl.ScrapeResponse> {
return this.ky
.post('/scrape', {
json: params
})
.json<firecrawl.ScrapeResponse>()
}
/**
* Crawl multiple URLs based on options.
*/
@aiFunction({
name: 'crawl_urls',
description: 'Crawl multiple URLs based on options.',
inputSchema: firecrawl.CrawlUrlsParamsSchema
})
async crawlUrls(
params: firecrawl.CrawlUrlsParams
): Promise<firecrawl.CrawlUrlsResponse> {
return this.ky
.post('/crawl', {
json: params
})
.json<firecrawl.CrawlUrlsResponse>()
}
/**
* Search for a keyword in Google, returns top page results with markdown content for each page.
*/
@aiFunction({
name: 'search_google',
description:
'Search for a keyword in Google, returns top page results with markdown content for each page.',
inputSchema: firecrawl.SearchGoogleParamsSchema
})
async searchGoogle(
params: firecrawl.SearchGoogleParams
): Promise<firecrawl.SearchGoogleResponse> {
return this.ky
.post('/search', {
json: params
})
.json<firecrawl.SearchGoogleResponse>()
}
/**
* Get the status of a crawl job.
*/
@aiFunction({
name: 'get_crawl_status',
description: 'Get the status of a crawl job.',
inputSchema: firecrawl.GetCrawlStatusParamsSchema
})
async getCrawlStatus(
params: firecrawl.GetCrawlStatusParams
): Promise<firecrawl.GetCrawlStatusResponse> {
return this.ky
.get(`/crawl/status/${params.jobId}`)
.json<firecrawl.GetCrawlStatusResponse>()
}
/**
* Cancel a crawl job.
*/
@aiFunction({
name: 'cancel_crawl_job',
description: 'Cancel a crawl job.',
inputSchema: firecrawl.CancelCrawlJobParamsSchema
})
async cancelCrawlJob(
params: firecrawl.CancelCrawlJobParams
): Promise<firecrawl.CancelCrawlJobResponse> {
return this.ky
.delete(`/crawl/cancel/${params.jobId}`)
.json<firecrawl.CancelCrawlJobResponse>()
}
}