diff --git a/legacy/bin/scratch.ts b/legacy/bin/scratch.ts index 119781a7..e77ed2d6 100644 --- a/legacy/bin/scratch.ts +++ b/legacy/bin/scratch.ts @@ -5,7 +5,8 @@ import { gracefulExit } from 'exit-hook' import restoreCursor from 'restore-cursor' // import { ClearbitClient } from '../src/index.js' -import { ProxycurlClient } from '../src/services/proxycurl-client.js' +// import { ProxycurlClient } from '../src/services/proxycurl-client.js' +import { WikipediaClient } from '../src/services/wikipedia-client.js' /** * Scratch pad for testing. @@ -19,10 +20,16 @@ async function main() { // }) // console.log(JSON.stringify(res, null, 2)) - const proxycurl = new ProxycurlClient() - const res = await proxycurl.getLinkedInPerson({ - linkedin_profile_url: 'https://linkedin.com/in/fisch2' - // personal_email: 'fisch0920@gmail.com' + // const proxycurl = new ProxycurlClient() + // const res = await proxycurl.getLinkedInPerson({ + // linkedin_profile_url: 'https://linkedin.com/in/fisch2' + // // personal_email: 'fisch0920@gmail.com' + // }) + // console.log(JSON.stringify(res, null, 2)) + + const wikipedia = new WikipediaClient() + const res = await wikipedia.getPageSummary({ + title: 'Naruto_(TV_series)' }) console.log(JSON.stringify(res, null, 2)) diff --git a/legacy/readme.md b/legacy/readme.md index 89345bcc..ee4e9d54 100644 --- a/legacy/readme.md +++ b/legacy/readme.md @@ -33,6 +33,7 @@ - serper - twitter - weatherapi +- wikipedia ## TODO @@ -45,7 +46,6 @@ - walter - services - wolfram alpha - - wikipedia - midjourney - unstructured - pull from [langchain](https://github.com/langchain-ai/langchainjs/tree/main/langchain) diff --git a/legacy/src/services/exa-client.ts b/legacy/src/services/exa-client.ts index 8bda881a..a516fe2d 100644 --- a/legacy/src/services/exa-client.ts +++ b/legacy/src/services/exa-client.ts @@ -160,7 +160,7 @@ export class ExaClient { * Performs an Exa search for the given query. */ async search(opts: { query: string } & exa.RegularSearchOptions) { - return this.ky.post('search', { json: opts }).json() + return this.ky.get('search', { json: opts }).json() } /** @@ -202,8 +202,6 @@ export class ExaClient { /** * Finds similar links to the provided URL and returns the contents of the * documents. - * - * @param {string} url - The URL for which to find similar links. */ async findSimilarAndContents< T extends exa.ContentsOptions = exa.ContentsOptions @@ -232,8 +230,6 @@ export class ExaClient { /** * Retrieves contents of documents based on a list of document IDs. - * - * @param {string | string[] | SearchResult[]} ids - An array of document IDs. */ async getContents({ ids, diff --git a/legacy/src/services/index.ts b/legacy/src/services/index.ts index 06daec68..d477b668 100644 --- a/legacy/src/services/index.ts +++ b/legacy/src/services/index.ts @@ -13,3 +13,4 @@ export * from './serpapi-client.js' export * from './serper-client.js' export * from './twitter-client.js' export * from './weather-client.js' +export * from './wikipedia-client.js' diff --git a/legacy/src/services/wikipedia-client.ts b/legacy/src/services/wikipedia-client.ts new file mode 100644 index 00000000..7dfd591f --- /dev/null +++ b/legacy/src/services/wikipedia-client.ts @@ -0,0 +1,158 @@ +import defaultKy, { type KyInstance } from 'ky' +import pThrottle from 'p-throttle' + +import { assert, getEnv, throttleKy } from '../utils.js' + +export namespace wikipedia { + // Only allow 200 requests per second + export const throttle = pThrottle({ + limit: 200, + interval: 1000 + }) + + export interface SearchOptions { + query: string + limit?: number + } + + export interface PageSearchResponse { + pages: Page[] + } + + export interface Page { + id: number + key: string + title: string + matched_title: null + excerpt: string + description: null | string + thumbnail: Thumbnail | null + } + + export interface Thumbnail { + url: string + width: number + height: number + mimetype: string + duration: null + } + + export interface PageSummaryOptions { + title: string + redirect?: boolean + acceptLanguage?: string + } + + export interface PageSummary { + ns?: number + index?: number + type: string + title: string + displaytitle: string + namespace: { id: number; text: string } + wikibase_item: string + titles: { canonical: string; normalized: string; display: string } + pageid: number + thumbnail: { + source: string + width: number + height: number + } + originalimage: { + source: string + width: number + height: number + } + lang: string + dir: string + revision: string + tid: string + timestamp: string + description: string + description_source: string + content_urls: { + desktop: { + page: string + revisions: string + edit: string + talk: string + } + mobile: { + page: string + revisions: string + edit: string + talk: string + } + } + extract: string + extract_html: string + normalizedtitle?: string + coordinates?: { + lat: number + lon: number + } + } +} + +export class WikipediaClient { + readonly apiBaseUrl: string + readonly apiUserAgent: string + readonly ky: KyInstance + + constructor({ + apiBaseUrl = getEnv('WIKIPEDIA_API_BASE_URL') ?? + 'https://en.wikipedia.org/api/rest_v1', + apiUserAgent = getEnv('WIKIPEDIA_API_USER_AGENT') ?? + 'Agentic (https://github.com/transitive-bullshit/agentic)', + throttle = true, + ky = defaultKy + }: { + apiBaseUrl?: string + apiUserAgent?: string + throttle?: boolean + ky?: KyInstance + } = {}) { + assert(apiBaseUrl, 'WikipediaClient missing required "apiBaseUrl"') + assert(apiUserAgent, 'WikipediaClient missing required "apiUserAgent"') + + this.apiBaseUrl = apiBaseUrl + this.apiUserAgent = apiUserAgent + + const throttledKy = throttle ? throttleKy(ky, wikipedia.throttle) : ky + + this.ky = throttledKy.extend({ + headers: { + 'api-user-agent': apiUserAgent + } + }) + } + + async search({ query, ...opts }: wikipedia.SearchOptions) { + return ( + // https://www.mediawiki.org/wiki/API:REST_API + this.ky + .get('https://en.wikipedia.org/w/rest.php/v1/search/page', { + searchParams: { q: query, ...opts } + }) + .json() + ) + } + + async getPageSummary({ + title, + acceptLanguage = 'en-us', + redirect = true, + ...opts + }: wikipedia.PageSummaryOptions) { + // https://en.wikipedia.org/api/rest_v1/ + return this.ky + .get(`page/summary/${title}`, { + prefixUrl: this.apiBaseUrl, + searchParams: { redirect, ...opts }, + headers: { + 'accept-language': acceptLanguage + } + }) + .json() + } +} diff --git a/legacy/tsconfig.json b/legacy/tsconfig.json index 07b69730..4066d276 100644 --- a/legacy/tsconfig.json +++ b/legacy/tsconfig.json @@ -16,7 +16,6 @@ // "emitDecoratorMetadata": true, "strict": true, - "strictNullChecks": true, "noUncheckedIndexedAccess": true, "forceConsistentCasingInFileNames": true, diff --git a/legacy/tsup.config.ts b/legacy/tsup.config.ts index bbb2d78b..3710148e 100644 --- a/legacy/tsup.config.ts +++ b/legacy/tsup.config.ts @@ -4,7 +4,7 @@ export default defineConfig([ { entry: ['src/index.ts'], outDir: 'dist', - target: 'node18', + target: 'node22', platform: 'node', format: ['esm'], splitting: false,