kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
				
				
				
			feat: add jina ai reader client: JinaClient
							rodzic
							
								
									2f6aac6152
								
							
						
					
					
						commit
						32e873ae17
					
				| 
						 | 
				
			
			@ -20,7 +20,8 @@ import restoreCursor from 'restore-cursor'
 | 
			
		|||
// import { BingClient } from '../src/index.js'
 | 
			
		||||
// import { TavilyClient } from '../src/index.js'
 | 
			
		||||
// import { SocialDataClient } from '../src/index.js'
 | 
			
		||||
import { HunterClient } from '../src/index.js'
 | 
			
		||||
// import { HunterClient } from '../src/index.js'
 | 
			
		||||
import { JinaClient } from '../src/index.js'
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Scratch pad for testing.
 | 
			
		||||
| 
						 | 
				
			
			@ -125,15 +126,28 @@ async function main() {
 | 
			
		|||
  // const res = await socialData.getUserByUsername('transitive_bs')
 | 
			
		||||
  // console.log(JSON.stringify(res, null, 2))
 | 
			
		||||
 | 
			
		||||
  const hunter = new HunterClient()
 | 
			
		||||
  // const res = await hunter.emailVerifier({
 | 
			
		||||
  //   email: 'travis@transitivebullsh.it'
 | 
			
		||||
  // const hunter = new HunterClient()
 | 
			
		||||
  // // const res = await hunter.emailVerifier({
 | 
			
		||||
  // //   email: 'travis@transitivebullsh.it'
 | 
			
		||||
  // // })
 | 
			
		||||
  // const res = await hunter.emailFinder({
 | 
			
		||||
  //   domain: 'aomni.com',
 | 
			
		||||
  //   first_name: 'David',
 | 
			
		||||
  //   last_name: 'Zhang'
 | 
			
		||||
  // })
 | 
			
		||||
  const res = await hunter.emailFinder({
 | 
			
		||||
    domain: 'aomni.com',
 | 
			
		||||
    first_name: 'David',
 | 
			
		||||
    last_name: 'Zhang'
 | 
			
		||||
  // console.log(JSON.stringify(res, null, 2))
 | 
			
		||||
 | 
			
		||||
  const jina = new JinaClient()
 | 
			
		||||
  const res = await jina.readUrl({
 | 
			
		||||
    url: 'https://news.ycombinator.com'
 | 
			
		||||
    // returnFormat: 'screenshot'
 | 
			
		||||
    // json: true
 | 
			
		||||
  })
 | 
			
		||||
  // const res = await jina.search({
 | 
			
		||||
  //   query: 'trump assassination attempt',
 | 
			
		||||
  //   // returnFormat: 'screenshot',
 | 
			
		||||
  //   json: true
 | 
			
		||||
  // })
 | 
			
		||||
  console.log(JSON.stringify(res, null, 2))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -140,6 +140,7 @@ Depending on the AI SDK and tool you want to use, you'll also need to install th
 | 
			
		|||
| [Firecrawl](https://www.firecrawl.dev)                                   | `FirecrawlClient`      | Website scraping and sanitization.                                                                                                                                                                                                                             |
 | 
			
		||||
| [HackerNews](https://github.com/HackerNews/API)                          | `HackerNewsClient`     | Official HackerNews API.                                                                                                                                                                                                                                       |
 | 
			
		||||
| [Hunter](https://hunter.io)                                              | `HunterClient`         | Email finder, verifier, and enrichment.                                                                                                                                                                                                                        |
 | 
			
		||||
| [Jina](https://jina.ai/reader)                                           | `JinaClient`           | Clean URL reader and web search + URL top result reading with a generous free tier.                                                                                                                                                                            |
 | 
			
		||||
| [Midjourney](https://www.imagineapi.dev)                                 | `MidjourneyClient`     | Unofficial Midjourney client for generative images.                                                                                                                                                                                                            |
 | 
			
		||||
| [Novu](https://novu.co)                                                  | `NovuClient`           | Sending notifications (email, SMS, in-app, push, etc).                                                                                                                                                                                                         |
 | 
			
		||||
| [People Data Labs](https://www.peopledatalabs.com)                       | `PeopleDataLabsClient` | People & company data (WIP).                                                                                                                                                                                                                                   |
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,6 +6,7 @@ export * from './exa-client.js'
 | 
			
		|||
export * from './firecrawl-client.js'
 | 
			
		||||
export * from './hacker-news-client.js'
 | 
			
		||||
export * from './hunter-client.js'
 | 
			
		||||
export * from './jina-client.js'
 | 
			
		||||
export * from './midjourney-client.js'
 | 
			
		||||
export * from './novu-client.js'
 | 
			
		||||
export * from './people-data-labs-client.js'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,251 @@
 | 
			
		|||
import defaultKy, { type KyInstance } from 'ky'
 | 
			
		||||
import pThrottle from 'p-throttle'
 | 
			
		||||
import { z } from 'zod'
 | 
			
		||||
 | 
			
		||||
import { aiFunction, AIFunctionsProvider } from '../fns.js'
 | 
			
		||||
import { getEnv, pruneNullOrUndefined, throttleKy } from '../utils.js'
 | 
			
		||||
 | 
			
		||||
export namespace jina {
 | 
			
		||||
  export const ReaderFormatSchema = z.enum([
 | 
			
		||||
    'text',
 | 
			
		||||
    'html',
 | 
			
		||||
    'markdown',
 | 
			
		||||
    'screenshot'
 | 
			
		||||
  ])
 | 
			
		||||
  export type ReaderFormat = z.infer<typeof ReaderFormatSchema>
 | 
			
		||||
 | 
			
		||||
  export const ReaderOptionsSchema = z.object({
 | 
			
		||||
    url: z.string().describe('URL to read'),
 | 
			
		||||
    timeout: z.number().optional().describe('Optional timeout in seconds'),
 | 
			
		||||
    targetSelector: z
 | 
			
		||||
      .string()
 | 
			
		||||
      .optional()
 | 
			
		||||
      .describe(
 | 
			
		||||
        "Optional CSS selector to focus on a more specific part of the page. Useful when your desired content doesn't show under the default settings."
 | 
			
		||||
      ),
 | 
			
		||||
    waitForSelector: z
 | 
			
		||||
      .string()
 | 
			
		||||
      .optional()
 | 
			
		||||
      .describe(
 | 
			
		||||
        "Optional CSS selector to wait for before returning. Useful when your desired content doesn't show under the default settings."
 | 
			
		||||
      ),
 | 
			
		||||
    withGeneratedAlt: z.boolean().optional(),
 | 
			
		||||
    withLinksSummary: z.boolean().optional(),
 | 
			
		||||
    withImagesSummary: z.boolean().optional(),
 | 
			
		||||
    setCookie: z.string().optional(),
 | 
			
		||||
    proxyUrl: z.string().optional(),
 | 
			
		||||
    noCache: z.boolean().optional(),
 | 
			
		||||
    returnFormat: ReaderFormatSchema.optional(),
 | 
			
		||||
    json: z.boolean().optional()
 | 
			
		||||
  })
 | 
			
		||||
  export type ReaderOptions = z.infer<typeof ReaderOptionsSchema>
 | 
			
		||||
 | 
			
		||||
  export const SearchOptionsSchema = z.object({
 | 
			
		||||
    query: z.string().describe('Search query'),
 | 
			
		||||
    site: z
 | 
			
		||||
      .string()
 | 
			
		||||
      .optional()
 | 
			
		||||
      .describe(
 | 
			
		||||
        'Returns the search results only from the specified website or domain. By default it searches the entire web.'
 | 
			
		||||
      ),
 | 
			
		||||
    withGeneratedAlt: z.boolean().optional(),
 | 
			
		||||
    withLinksSummary: z.boolean().optional(),
 | 
			
		||||
    withImagesSummary: z.boolean().optional(),
 | 
			
		||||
    setCookie: z.string().optional(),
 | 
			
		||||
    proxyUrl: z.string().optional(),
 | 
			
		||||
    noCache: z.boolean().optional(),
 | 
			
		||||
    returnFormat: ReaderFormatSchema.exclude(['screenshot']).optional(),
 | 
			
		||||
    json: z.boolean().optional()
 | 
			
		||||
  })
 | 
			
		||||
  export type SearchOptions = z.infer<typeof SearchOptionsSchema>
 | 
			
		||||
 | 
			
		||||
  export interface JinaResponse {
 | 
			
		||||
    code: number
 | 
			
		||||
    status: number
 | 
			
		||||
    data: unknown
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  export interface ReaderResponse extends JinaResponse {
 | 
			
		||||
    data: ReaderData
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  export interface ReaderResponseScreenshot extends JinaResponse {
 | 
			
		||||
    data: {
 | 
			
		||||
      screenshotUrl: string
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  export interface SearchResponse extends JinaResponse {
 | 
			
		||||
    data: ReaderData[]
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  export interface ReaderData {
 | 
			
		||||
    url: string
 | 
			
		||||
    title: string
 | 
			
		||||
    content: string
 | 
			
		||||
    description?: string
 | 
			
		||||
    publishedTime?: string
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * LLM-friendly URL reader and search client by Jina AI.
 | 
			
		||||
 *
 | 
			
		||||
 * - Includes a very generous free tier.
 | 
			
		||||
 * - Does not support "stream mode".
 | 
			
		||||
 * - Results default to markdown text format.
 | 
			
		||||
 * - To return JSON (especially useful for `search`), set `json: true` in the
 | 
			
		||||
 *   options.
 | 
			
		||||
 *
 | 
			
		||||
 * @see https://jina.ai/reader
 | 
			
		||||
 */
 | 
			
		||||
export class JinaClient extends AIFunctionsProvider {
 | 
			
		||||
  protected readonly kyReader: KyInstance
 | 
			
		||||
  protected readonly kySearch: KyInstance
 | 
			
		||||
  protected readonly apiKey?: string
 | 
			
		||||
 | 
			
		||||
  constructor({
 | 
			
		||||
    apiKey = getEnv('JINA_API_KEY'),
 | 
			
		||||
    throttle = true,
 | 
			
		||||
    ky = defaultKy
 | 
			
		||||
  }: {
 | 
			
		||||
    apiKey?: string
 | 
			
		||||
    throttle?: boolean
 | 
			
		||||
    ky?: KyInstance
 | 
			
		||||
  } = {}) {
 | 
			
		||||
    super()
 | 
			
		||||
 | 
			
		||||
    this.apiKey = apiKey
 | 
			
		||||
 | 
			
		||||
    if (apiKey) {
 | 
			
		||||
      ky = ky.extend({ headers: { Authorization: `Bearer ${apiKey}` } })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const throttledKyReader = throttle
 | 
			
		||||
      ? throttleKy(
 | 
			
		||||
          ky,
 | 
			
		||||
          pThrottle({
 | 
			
		||||
            limit: apiKey ? 200 : 20,
 | 
			
		||||
            interval: 60 * 60 * 1000
 | 
			
		||||
          })
 | 
			
		||||
        )
 | 
			
		||||
      : ky
 | 
			
		||||
    this.kyReader = throttledKyReader.extend({ prefixUrl: 'https://r.jina.ai' })
 | 
			
		||||
 | 
			
		||||
    const throttledKySearch = throttle
 | 
			
		||||
      ? throttleKy(
 | 
			
		||||
          ky,
 | 
			
		||||
          pThrottle({
 | 
			
		||||
            limit: apiKey ? 40 : 5,
 | 
			
		||||
            interval: 60 * 60 * 1000
 | 
			
		||||
          })
 | 
			
		||||
        )
 | 
			
		||||
      : ky
 | 
			
		||||
    this.kySearch = throttledKySearch.extend({ prefixUrl: 'https://s.jina.ai	' })
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  @aiFunction({
 | 
			
		||||
    name: 'readUrl',
 | 
			
		||||
    description:
 | 
			
		||||
      "Reads the contents of the given URL and returns it's main contents in a clean, LLM-friendly format.",
 | 
			
		||||
    inputSchema: jina.ReaderOptionsSchema
 | 
			
		||||
  })
 | 
			
		||||
  async readUrl<T extends string | jina.ReaderOptions>(
 | 
			
		||||
    urlOrOptions: T
 | 
			
		||||
  ): Promise<
 | 
			
		||||
    T extends string
 | 
			
		||||
      ? string
 | 
			
		||||
      : T extends jina.ReaderOptions
 | 
			
		||||
        ? T['json'] extends true
 | 
			
		||||
          ? T['returnFormat'] extends 'screenshot'
 | 
			
		||||
            ? jina.ReaderResponseScreenshot
 | 
			
		||||
            : jina.ReaderResponse
 | 
			
		||||
          : T['returnFormat'] extends 'screenshot'
 | 
			
		||||
            ? ArrayBuffer
 | 
			
		||||
            : string
 | 
			
		||||
        : never
 | 
			
		||||
  > {
 | 
			
		||||
    const { url, ...opts } =
 | 
			
		||||
      typeof urlOrOptions === 'string'
 | 
			
		||||
        ? { url: urlOrOptions }
 | 
			
		||||
        : jina.ReaderOptionsSchema.parse(pruneNullOrUndefined(urlOrOptions))
 | 
			
		||||
    const headers = this._getHeadersFromOptions(opts)
 | 
			
		||||
 | 
			
		||||
    const res = this.kyReader.get(url, { headers })
 | 
			
		||||
 | 
			
		||||
    if (opts.json) {
 | 
			
		||||
      return res.json<jina.ReaderResponse>() as any
 | 
			
		||||
    } else if (opts.returnFormat === 'screenshot') {
 | 
			
		||||
      return res.arrayBuffer() as any
 | 
			
		||||
    } else {
 | 
			
		||||
      return res.text() as any
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  @aiFunction({
 | 
			
		||||
    name: 'search',
 | 
			
		||||
    description:
 | 
			
		||||
      'Searches the web for the given query and returns the top-5 results including their page contents in a clean, LLM-friendly format.',
 | 
			
		||||
    inputSchema: jina.SearchOptionsSchema
 | 
			
		||||
  })
 | 
			
		||||
  async search<T extends string | jina.SearchOptions>(
 | 
			
		||||
    queryOrOptions: T
 | 
			
		||||
  ): Promise<
 | 
			
		||||
    T extends string
 | 
			
		||||
      ? string
 | 
			
		||||
      : T extends jina.SearchOptions
 | 
			
		||||
        ? T['json'] extends true
 | 
			
		||||
          ? jina.SearchResponse
 | 
			
		||||
          : string
 | 
			
		||||
        : never
 | 
			
		||||
  > {
 | 
			
		||||
    const { query, ...opts } =
 | 
			
		||||
      typeof queryOrOptions === 'string'
 | 
			
		||||
        ? { query: queryOrOptions }
 | 
			
		||||
        : jina.SearchOptionsSchema.parse(pruneNullOrUndefined(queryOrOptions))
 | 
			
		||||
    const headers = this._getHeadersFromOptions(opts)
 | 
			
		||||
 | 
			
		||||
    const res = this.kySearch.get(query, { headers })
 | 
			
		||||
 | 
			
		||||
    if (opts.json) {
 | 
			
		||||
      return res.json<jina.SearchResponse>() as any
 | 
			
		||||
    } else {
 | 
			
		||||
      return res.text() as any
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  protected _getHeadersFromOptions(
 | 
			
		||||
    options: Record<string, string | boolean | number>
 | 
			
		||||
  ) {
 | 
			
		||||
    const { json, ...rest } = options
 | 
			
		||||
 | 
			
		||||
    const headerMap: Record<string, string> = {
 | 
			
		||||
      site: 'site',
 | 
			
		||||
      timeout: 'x-timeout',
 | 
			
		||||
      targetSelector: 'x-target-selector',
 | 
			
		||||
      waitForSelector: 'x-wait-for-selector',
 | 
			
		||||
      withGeneratedAlt: 'x-with-generated-alt',
 | 
			
		||||
      withLinksSummary: 'x-with-links-summary',
 | 
			
		||||
      withImagesSummary: 'x-with-images-summary',
 | 
			
		||||
      setCookie: 'x-set-cookie',
 | 
			
		||||
      proxyUrl: 'x-proxy-url',
 | 
			
		||||
      noCache: 'x-no-cache',
 | 
			
		||||
      returnFormat: 'x-return-format'
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const headers = Object.fromEntries(
 | 
			
		||||
      Object.entries(rest).map(([key, value]) => [
 | 
			
		||||
        headerMap[key as string]!,
 | 
			
		||||
        String(value)
 | 
			
		||||
      ])
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    if (json) {
 | 
			
		||||
      headers.accept = 'application/json'
 | 
			
		||||
    } else if (options.returnFormat !== 'screenshot') {
 | 
			
		||||
      headers.accept = 'text/plain'
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return headers
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
		Ładowanie…
	
		Reference in New Issue