From 1c10060651d7996689f3ec10cb2afd55626ddbc9 Mon Sep 17 00:00:00 2001 From: Travis Fischer Date: Tue, 4 Jun 2024 22:58:32 -0500 Subject: [PATCH] =?UTF-8?q?=E2=98=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bin/scratch.ts | 5 +++-- src/services/firecrawl-client.ts | 38 +++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/bin/scratch.ts b/bin/scratch.ts index 3b5df8a..a90ef82 100644 --- a/bin/scratch.ts +++ b/bin/scratch.ts @@ -58,8 +58,9 @@ async function main() { const firecrawl = new FirecrawlClient() const res = await firecrawl.scrapeUrl({ - // url: 'https://www.bbc.com/news/articles/cp4475gwny1o' - url: 'https://www.firecrawl.dev' + url: 'https://www.bbc.com/news/articles/cp4475gwny1o' + // url: 'https://www.theguardian.com/technology/article/2024/jun/04/openai-google-ai-risks-letter' + // url: 'https://www.firecrawl.dev' }) console.log(JSON.stringify(res, null, 2)) diff --git a/src/services/firecrawl-client.ts b/src/services/firecrawl-client.ts index 134d1d4..dbb85b5 100644 --- a/src/services/firecrawl-client.ts +++ b/src/services/firecrawl-client.ts @@ -25,10 +25,33 @@ export namespace firecrawl { */ export interface ScrapeResponse { success: boolean - data?: any + data?: Data error?: string } + export interface Data { + content?: string + markdown?: string + html?: string + metadata: Metadata + } + + export interface Metadata { + title: string + description: string + keywords?: string + robots?: string + ogTitle?: string + ogDescription?: string + ogUrl?: string + ogImage?: string + ogLocaleAlternate?: any[] + ogSiteName?: string + sourceURL?: string + modifiedTime?: string + publishedTime?: string + } + /** * Response interface for searching operations. */ @@ -132,9 +155,18 @@ export class FirecrawlClient extends AIFunctionsProvider { } } - return this.ky - .post('v0/scrapeUrl', { json }) + const res = await this.ky + .post('v0/scrape', { json }) .json() + + if (!res.success || !res.data) return res + + if (res.data.markdown) { + delete res.data.html + delete res.data.content + } + + return res } async search(