diff --git a/bin/scratch.ts b/bin/scratch.ts index e21109a..7cf5df3 100644 --- a/bin/scratch.ts +++ b/bin/scratch.ts @@ -7,7 +7,8 @@ import restoreCursor from 'restore-cursor' // import { ClearbitClient } from '../src/index.js' // import { ProxycurlClient } from '../src/services/proxycurl-client.js' // import { WikipediaClient } from '../src/index.js' -import { PerigonClient } from '../src/index.js' +// import { PerigonClient } from '../src/index.js' +import { FirecrawlClient } from '../src/index.js' /** * Scratch pad for testing. @@ -41,18 +42,24 @@ async function main() { // }) // console.log(JSON.stringify(res, null, 2)) - const perigon = new PerigonClient() - const res = await perigon.searchArticles({ - q: 'AI agents AND startup', - sourceGroup: 'top50tech' - }) + // const perigon = new PerigonClient() + // const res = await perigon.searchArticles({ + // q: 'AI agents AND startup', + // sourceGroup: 'top50tech' + // }) + // console.log(JSON.stringify(res, null, 2)) + const firecrawl = new FirecrawlClient() + const res = await firecrawl.scrapeUrl({ + // url: 'https://www.bbc.com/news/articles/cp4475gwny1o' + url: 'https://www.firecrawl.dev' + }) console.log(JSON.stringify(res, null, 2)) } try { await main() } catch (err) { - console.error('unexpected error', err) + console.error('error', err) process.exit(1) } diff --git a/examples/openai/weather.ts b/examples/openai/weather.ts index 853a985..6b11ccb 100644 --- a/examples/openai/weather.ts +++ b/examples/openai/weather.ts @@ -2,9 +2,8 @@ import 'dotenv/config' import OpenAI from 'openai' -import { default as assert } from 'tiny-invariant' -import { WeatherClient } from '../../src/index.js' +import { assert, WeatherClient } from '../../src/index.js' async function main() { const weather = new WeatherClient() diff --git a/package.json b/package.json index 1531195..45fbe1c 100644 --- a/package.json +++ b/package.json @@ -63,7 +63,6 @@ "jsonrepair": "^3.6.1", "ky": "^1.2.4", "p-throttle": "^6.1.0", - "tiny-invariant": "^1.3.3", "twitter-api-sdk": "^1.2.1", "type-fest": "^4.18.3", "zod": "^3.23.3", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5f1eacd..7b9efbd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -26,9 +26,6 @@ importers: p-throttle: specifier: ^6.1.0 version: 6.1.0 - tiny-invariant: - specifier: ^1.3.3 - version: 1.3.3 twitter-api-sdk: specifier: ^1.2.1 version: 1.2.1 @@ -75,9 +72,6 @@ importers: eslint: specifier: ^8.57.0 version: 8.57.0 - exit-hook: - specifier: ^4.0.0 - version: 4.0.0 husky: specifier: ^9.0.11 version: 9.0.11 @@ -4060,9 +4054,6 @@ packages: tiktoken@1.0.15: resolution: {integrity: sha512-sCsrq/vMWUSEW29CJLNmPvWxlVp7yh2tlkAjpJltIKqp5CKf98ZNpdeHRmAlPVFlGEbswDc6SmI8vz64W/qErw==} - tiny-invariant@1.3.3: - resolution: {integrity: sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==} - tinybench@2.8.0: resolution: {integrity: sha512-1/eK7zUnIklz4JUUlL+658n58XO2hHLQfSk1Zf2LKieUjxidN16eKFEoDEfjHc3ohofSSqK3X5yO6VGb6iW8Lw==} @@ -8681,8 +8672,6 @@ snapshots: tiktoken@1.0.15: {} - tiny-invariant@1.3.3: {} - tinybench@2.8.0: {} tinypool@0.9.0: {} diff --git a/readme.md b/readme.md index 6732ff2..9ec9ddb 100644 --- a/readme.md +++ b/readme.md @@ -50,6 +50,11 @@ - instructor-js - TODO - services + - calculator + - e2b + - search-and-scrape + - replicate + - huggingface - wolfram alpha - midjourney - unstructured diff --git a/src/assert.ts b/src/assert.ts new file mode 100644 index 0000000..01d9253 --- /dev/null +++ b/src/assert.ts @@ -0,0 +1,32 @@ +/** + * Slightly modified version of [tiny-invariant](https://github.com/alexreardon/tiny-invariant). + * + * `assert` is used to [assert](https://www.typescriptlang.org/docs/handbook/release-notes/typescript-3-7.html#assertion-functions) that the `condition` is [truthy](https://github.com/getify/You-Dont-Know-JS/blob/bdbe570600d4e1107d0b131787903ca1c9ec8140/up%20%26%20going/ch2.md#truthy--falsy). + * + * 💥 `assert` will `throw` an `Error` if the `condition` is [falsey](https://github.com/getify/You-Dont-Know-JS/blob/bdbe570600d4e1107d0b131787903ca1c9ec8140/up%20%26%20going/ch2.md#truthy--falsy) + * + * @example + * + * ```ts + * const value: Person | null = { name: 'Alex' }; + * assert(value, 'Expected value to be a person'); + * // type of `value`` has been narrowed to `Person` + * ``` + */ +export function assert( + condition: any, + /** + * Can provide a string, or a function that returns a string for cases where + * the message takes a fair amount of effort to compute. + */ + message?: string | (() => string) +): asserts condition { + if (condition) { + return + } + + const providedMessage: string | undefined = + typeof message === 'function' ? message() : message + + throw new Error(providedMessage ?? 'Assertion failed') +} diff --git a/src/services/firecrawl-client.ts b/src/services/firecrawl-client.ts index 970b979..cd7157d 100644 --- a/src/services/firecrawl-client.ts +++ b/src/services/firecrawl-client.ts @@ -1,15 +1,17 @@ import defaultKy, { type KyInstance } from 'ky' import z from 'zod' +import { aiFunction, AIFunctionsProvider } from '../fns.js' import { assert, delay, getEnv } from '../utils.js' import { zodToJsonSchema } from '../zod-to-json-schema.js' +// TODO: Deprioritizing this client for now because the API doesn't seem to be stable. + export namespace firecrawl { /** * Generic parameter interface. */ export interface Params { - [key: string]: any extractorOptions?: { extractionSchema: z.ZodSchema | any mode?: 'llm-extraction' @@ -59,8 +61,9 @@ export namespace firecrawl { /** * @see https://www.firecrawl.dev + * @see https://github.com/mendableai/firecrawl */ -export class FirecrawlClient { +export class FirecrawlClient extends AIFunctionsProvider { readonly ky: KyInstance readonly apiKey: string readonly apiBaseUrl: string @@ -69,10 +72,12 @@ export class FirecrawlClient { apiKey = getEnv('FIRECRAWL_API_KEY'), apiBaseUrl = getEnv('FIRECRAWL_API_BASE_URL') ?? 'https://api.firecrawl.dev', + timeoutMs = 60_000, ky = defaultKy }: { apiKey?: string apiBaseUrl?: string + timeoutMs?: number ky?: KyInstance } = {}) { assert( @@ -83,18 +88,27 @@ export class FirecrawlClient { apiBaseUrl, 'FirecrawlClient missing required "apiBaseUrl" (defaults to "FIRECRAWL_API_BASE_URL")' ) + super() this.apiKey = apiKey this.apiBaseUrl = apiBaseUrl this.ky = ky.extend({ prefixUrl: apiBaseUrl, + timeout: timeoutMs, headers: { Authorization: `Bearer ${this.apiKey}` } }) } + @aiFunction({ + name: 'firecrawl_scrape_url', + description: 'Scrape the contents of a URL.', + inputSchema: z.object({ + url: z.string().url().describe('The URL to scrape.') + }) + }) async scrapeUrl( opts: { url: string @@ -173,7 +187,7 @@ export class FirecrawlClient { async waitForCrawlJob({ jobId, - timeoutMs = 30_000 + timeoutMs = 60_000 }: { jobId: string timeoutMs?: number diff --git a/src/services/people-data-labs-client.ts b/src/services/people-data-labs-client.ts index 4e74c79..dc980ee 100644 --- a/src/services/people-data-labs-client.ts +++ b/src/services/people-data-labs-client.ts @@ -3,12 +3,25 @@ import pThrottle from 'p-throttle' import { assert, getEnv, throttleKy } from '../utils.js' +/** + * TODO: I'm holding off on converting this client to an `AIFunctionsProvider` + * because it seems to be significantly more expensive than other data sources, + * and I'm not sure if it's worth the cost. + */ + export namespace peopledatalabs { export const BASE_URL = 'https://api.peopledatalabs.com/v5/' - // Allow up to 20 requests per minute by default. - export const throttle = pThrottle({ - limit: 20, + // Allow up to 10 requests per minute. + export const throttle10PerMin = pThrottle({ + limit: 10, + interval: 60 * 1000, + strict: true + }) + + // Allow up to 100 requests per minute. + export const throttle100PerMin = pThrottle({ + limit: 100, interval: 60 * 1000, strict: true }) @@ -431,6 +444,11 @@ export namespace peopledatalabs { } } +/** + * People & Company Data + * + * @see https://www.peopledatalabs.com + */ export class PeopleDataLabsClient { readonly ky: KyInstance readonly apiKey: string @@ -457,13 +475,15 @@ export class PeopleDataLabsClient { this.apiKey = apiKey this.apiBaseUrl = apiBaseUrl - const throttledKy = throttle ? throttleKy(ky, peopledatalabs.throttle) : ky + const throttledKy = throttle + ? throttleKy(ky, peopledatalabs.throttle10PerMin) + : ky this.ky = throttledKy.extend({ prefixUrl: apiBaseUrl, timeout: timeoutMs, headers: { - 'X-Api-Key': `${this.apiKey}` + 'x-api-key': `${this.apiKey}` } }) } diff --git a/src/utils.ts b/src/utils.ts index b080d1d..e6c4bc4 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,7 +1,7 @@ import type * as types from './types.js' +export { assert } from './assert.js' export { default as delay } from 'delay' -export { default as assert } from 'tiny-invariant' /** * From `inputObj`, create a new object that does not include `keys`.