diff --git a/.eslintignore b/.eslintignore new file mode 100644 index 0000000..1fcb152 --- /dev/null +++ b/.eslintignore @@ -0,0 +1 @@ +out diff --git a/.gitignore b/.gitignore index 29026e3..b9ce495 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ next-env.d.ts .env old/ +out/ diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..1fcb152 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +out diff --git a/bin/scratch.ts b/bin/scratch.ts index d9a4cdd..25acf35 100644 --- a/bin/scratch.ts +++ b/bin/scratch.ts @@ -3,25 +3,7 @@ import 'dotenv/config' import restoreCursor from 'restore-cursor' -// import { SearxngClient } from '../src/services/searxng-client' -// import { ClearbitClient } from '../src/index' -// import { ProxycurlClient } from '../src/services/proxycurl-client' -// import { WikipediaClient } from '../src/index' -// import { PerigonClient } from '../src/index' -// import { FirecrawlClient } from '../src/index' -// import { ExaClient } from '../src/index' -// import { DiffbotClient } from '../src/index' -// import { WolframAlphaClient } from '../src/index' -// import { -// createTwitterV2Client, -// TwitterClient -// } from '../src/services/twitter/index' -// import { MidjourneyClient } from '../src/index' -// import { BingClient } from '../src/index' -// import { TavilyClient } from '../src/index' -// import { SocialDataClient } from '../src/index' -// import { HunterClient } from '../src/index' -import { JinaClient } from '../src/index' +import { WikipediaClient } from '../src' /** * Scratch pad for testing. @@ -137,17 +119,27 @@ async function main() { // }) // console.log(JSON.stringify(res, null, 2)) - const jina = new JinaClient() - const res = await jina.readUrl({ - url: 'https://news.ycombinator.com' - // returnFormat: 'screenshot' - // json: true - }) + // const jina = new JinaClient() + // const res = await jina.readUrl({ + // url: 'https://news.ycombinator.com' + // // returnFormat: 'screenshot' + // // json: true + // }) // const res = await jina.search({ // query: 'trump assassination attempt', // // returnFormat: 'screenshot', // json: true // }) + + // const serper = new SerpAPIClient() + // const res = await serper.search({ + // q: 'elon musk' + // }) + const wikipedia = new WikipediaClient() + const res = await wikipedia.getPageSummary({ + title: 'Elon_musk' + }) + console.log(JSON.stringify(res, null, 2)) } diff --git a/package.json b/package.json index bb3783b..f962418 100644 --- a/package.json +++ b/package.json @@ -131,7 +131,8 @@ "tsx": "^4.16.2", "twitter-api-sdk": "^1.2.1", "typescript": "^5.5.4", - "vitest": "2.0.4" + "vitest": "2.0.4", + "wikibase-sdk": "^10.0.2" }, "peerDependencies": { "@dexaai/dexter": "^2.0.3", @@ -143,7 +144,8 @@ "llamaindex": "^0.3.16", "mathjs": "^13.0.0", "octokit": "^4.0.2", - "twitter-api-sdk": "^1.2.1" + "twitter-api-sdk": "^1.2.1", + "wikibase-sdk": "^10.0.2" }, "peerDependenciesMeta": { "@dexaai/dexter": { @@ -175,6 +177,9 @@ }, "twitter-api-sdk": { "optional": true + }, + "wikibase-sdk": { + "optional": true } }, "lint-staged": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5b6c113..a444141 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -141,6 +141,9 @@ importers: vitest: specifier: 2.0.4 version: 2.0.4(@types/node@22.0.0) + wikibase-sdk: + specifier: ^10.0.2 + version: 10.0.2 examples: dependencies: @@ -6444,6 +6447,10 @@ packages: resolution: {integrity: sha512-c9bZp7b5YtRj2wOe6dlj32MK+Bx/M/d+9VB2SHM1OtsUHR0aV0tdP6DWh/iMt0kWi1t5g1Iudu6hQRNd1A4PVA==} engines: {node: '>=18'} + wikibase-sdk@10.0.2: + resolution: {integrity: sha512-4J1efmQU9oUC66BtqJkiqvLNtF1XVOvPKfnHMFfyyRcAHLTTeXnh+lDFIDiyI2sbNt2Q7oa4UlsxDB3ARK4CJA==} + engines: {node: '>= 12.0.0'} + wikipedia@2.1.2: resolution: {integrity: sha512-RAYaMpXC9/E873RaSEtlEa8dXK4e0p5k98GKOd210MtkE5emm6fcnwD+N6ZA4cuffjDWagvhaQKtp/mGp2BOVQ==} engines: {node: '>=10'} @@ -14477,6 +14484,8 @@ snapshots: dependencies: string-width: 7.2.0 + wikibase-sdk@10.0.2: {} + wikipedia@2.1.2: dependencies: axios: 1.7.2 diff --git a/readme.md b/readme.md index db68052..87f92a1 100644 --- a/readme.md +++ b/readme.md @@ -158,6 +158,7 @@ Depending on the AI SDK and tool you want to use, you'll also need to install th | [Twilio](https://www.twilio.com/docs/conversations/api) | `TwilioClient` | Twilio conversation API to send and receive SMS messages. | | [Twitter](https://developer.x.com/en/docs/twitter-api) | `TwitterClient` | Basic Twitter API methods for fetching users, tweets, and searching recent tweets. Includes support for plan-aware rate-limiting. Uses [Nango](https://www.nango.dev) for OAuth support. | | [WeatherAPI](https://www.weatherapi.com) | `WeatherClient` | Basic access to current weather data based on location. | +| [Wikidata](https://www.wikidata.org/wiki/Wikidata:Data_access) | `WikidataClient` | Basic Wikidata client. | | [Wikipedia](https://www.mediawiki.org/wiki/API) | `WikipediaClient` | Wikipedia page search and summaries. | | [Wolfram Alpha](https://products.wolframalpha.com/llm-api/documentation) | `WolframAlphaClient` | Wolfram Alpha LLM API client for answering computational, mathematical, and scientific questions. | diff --git a/src/services/index.ts b/src/services/index.ts index 30dc4c4..2244fdf 100644 --- a/src/services/index.ts +++ b/src/services/index.ts @@ -24,5 +24,6 @@ export * from './social-data-client' export * from './tavily-client' export * from './twilio-client' export * from './weather-client' +export * from './wikidata-client' export * from './wikipedia-client' export * from './wolfram-alpha-client' diff --git a/src/services/wikidata-client.ts b/src/services/wikidata-client.ts new file mode 100644 index 0000000..48148dc --- /dev/null +++ b/src/services/wikidata-client.ts @@ -0,0 +1,123 @@ +import type * as wikibase from 'wikibase-sdk' +import defaultKy, { type KyInstance } from 'ky' +import pThrottle from 'p-throttle' +import wdk from 'wikibase-sdk/wikidata.org' + +import { AIFunctionsProvider } from '../fns' +import { assert, getEnv, throttleKy } from '../utils' + +export namespace wikidata { + // Allow up to 200 requests per second by default. + export const throttle = pThrottle({ + limit: 200, + interval: 1000 + }) + + export type SimplifiedEntityMap = Record + + export interface SimplifiedEntity { + id: string + type: string + claims: Claims + modified: string + labels?: Descriptions + descriptions?: Descriptions + aliases?: any + sitelinks?: Sitelinks + } + + export interface Claims { + [key: string]: Claim[] + } + + export interface Claim { + value: string + qualifiers: Record + references: Record[] + } + + export type Descriptions = Record + export type Sitelinks = Record +} + +/** + * Basic Wikidata client. + * + * @see https://github.com/maxlath/wikibase-sdk + * + * TODO: support any wikibase instance + */ +export class WikidataClient extends AIFunctionsProvider { + protected readonly ky: KyInstance + protected readonly apiUserAgent: string + + constructor({ + apiUserAgent = getEnv('WIKIDATA_API_USER_AGENT') ?? + 'Agentic (https://github.com/transitive-bullshit/agentic)', + throttle = true, + ky = defaultKy + }: { + apiBaseUrl?: string + apiUserAgent?: string + throttle?: boolean + ky?: KyInstance + } = {}) { + assert(apiUserAgent, 'WikidataClient missing required "apiUserAgent"') + super() + + this.apiUserAgent = apiUserAgent + + const throttledKy = throttle ? throttleKy(ky, wikidata.throttle) : ky + + this.ky = throttledKy.extend({ + headers: { + 'user-agent': apiUserAgent + } + }) + } + + async getEntityById( + idOrOpts: string | { id: string; languages?: string[] } + ): Promise { + const { id, languages = ['en'] } = + typeof idOrOpts === 'string' ? { id: idOrOpts } : idOrOpts + + const url = wdk.getEntities({ + ids: id as wikibase.EntityId, + languages + }) + + const res = await this.ky.get(url).json() + const entities = wdk.simplify.entities(res.entities, { + // TODO: Make this configurable and double-check defaults. + keepQualifiers: true, + keepReferences: true + }) + + const entity = entities[id] + return entity as wikidata.SimplifiedEntity + } + + async getEntitiesByIds( + idsOrOpts: string[] | { ids: string; languages?: string[] } + ): Promise { + const { ids, languages = ['en'] } = Array.isArray(idsOrOpts) + ? { ids: idsOrOpts } + : idsOrOpts + + // TODO: Separate between wdk.getEntities and wdk.getManyEntities depending + // on how many `ids` there are. + const url = wdk.getEntities({ + ids: ids as wikibase.EntityId[], + languages + }) + + const res = await this.ky.get(url).json() + const entities = wdk.simplify.entities(res.entities, { + keepQualifiers: true, + keepReferences: true + }) + + return entities as wikidata.SimplifiedEntityMap + } +} diff --git a/src/services/wikipedia-client.ts b/src/services/wikipedia-client.ts index 5ec5717..50c70bb 100644 --- a/src/services/wikipedia-client.ts +++ b/src/services/wikipedia-client.ts @@ -45,7 +45,7 @@ export namespace wikipedia { acceptLanguage?: string } - export interface PageSummary { + export interface PageSummaryResponse { ns?: number index?: number type: string @@ -182,6 +182,6 @@ export class WikipediaClient extends AIFunctionsProvider { 'accept-language': acceptLanguage } }) - .json() + .json() } }