feat: add WikidataClient

pull/659/head
Travis Fischer 2024-08-01 03:49:50 -05:00
rodzic 84544c774c
commit 6ba3da68c3
10 zmienionych plików z 163 dodań i 29 usunięć

1
.eslintignore 100644
Wyświetl plik

@ -0,0 +1 @@
out

1
.gitignore vendored
Wyświetl plik

@ -39,3 +39,4 @@ next-env.d.ts
.env .env
old/ old/
out/

1
.prettierignore 100644
Wyświetl plik

@ -0,0 +1 @@
out

Wyświetl plik

@ -3,25 +3,7 @@ import 'dotenv/config'
import restoreCursor from 'restore-cursor' import restoreCursor from 'restore-cursor'
// import { SearxngClient } from '../src/services/searxng-client' import { WikipediaClient } from '../src'
// import { ClearbitClient } from '../src/index'
// import { ProxycurlClient } from '../src/services/proxycurl-client'
// import { WikipediaClient } from '../src/index'
// import { PerigonClient } from '../src/index'
// import { FirecrawlClient } from '../src/index'
// import { ExaClient } from '../src/index'
// import { DiffbotClient } from '../src/index'
// import { WolframAlphaClient } from '../src/index'
// import {
// createTwitterV2Client,
// TwitterClient
// } from '../src/services/twitter/index'
// import { MidjourneyClient } from '../src/index'
// import { BingClient } from '../src/index'
// import { TavilyClient } from '../src/index'
// import { SocialDataClient } from '../src/index'
// import { HunterClient } from '../src/index'
import { JinaClient } from '../src/index'
/** /**
* Scratch pad for testing. * Scratch pad for testing.
@ -137,17 +119,27 @@ async function main() {
// }) // })
// console.log(JSON.stringify(res, null, 2)) // console.log(JSON.stringify(res, null, 2))
const jina = new JinaClient() // const jina = new JinaClient()
const res = await jina.readUrl({ // const res = await jina.readUrl({
url: 'https://news.ycombinator.com' // url: 'https://news.ycombinator.com'
// returnFormat: 'screenshot' // // returnFormat: 'screenshot'
// json: true // // json: true
}) // })
// const res = await jina.search({ // const res = await jina.search({
// query: 'trump assassination attempt', // query: 'trump assassination attempt',
// // returnFormat: 'screenshot', // // returnFormat: 'screenshot',
// json: true // json: true
// }) // })
// const serper = new SerpAPIClient()
// const res = await serper.search({
// q: 'elon musk'
// })
const wikipedia = new WikipediaClient()
const res = await wikipedia.getPageSummary({
title: 'Elon_musk'
})
console.log(JSON.stringify(res, null, 2)) console.log(JSON.stringify(res, null, 2))
} }

Wyświetl plik

@ -131,7 +131,8 @@
"tsx": "^4.16.2", "tsx": "^4.16.2",
"twitter-api-sdk": "^1.2.1", "twitter-api-sdk": "^1.2.1",
"typescript": "^5.5.4", "typescript": "^5.5.4",
"vitest": "2.0.4" "vitest": "2.0.4",
"wikibase-sdk": "^10.0.2"
}, },
"peerDependencies": { "peerDependencies": {
"@dexaai/dexter": "^2.0.3", "@dexaai/dexter": "^2.0.3",
@ -143,7 +144,8 @@
"llamaindex": "^0.3.16", "llamaindex": "^0.3.16",
"mathjs": "^13.0.0", "mathjs": "^13.0.0",
"octokit": "^4.0.2", "octokit": "^4.0.2",
"twitter-api-sdk": "^1.2.1" "twitter-api-sdk": "^1.2.1",
"wikibase-sdk": "^10.0.2"
}, },
"peerDependenciesMeta": { "peerDependenciesMeta": {
"@dexaai/dexter": { "@dexaai/dexter": {
@ -175,6 +177,9 @@
}, },
"twitter-api-sdk": { "twitter-api-sdk": {
"optional": true "optional": true
},
"wikibase-sdk": {
"optional": true
} }
}, },
"lint-staged": { "lint-staged": {

Wyświetl plik

@ -141,6 +141,9 @@ importers:
vitest: vitest:
specifier: 2.0.4 specifier: 2.0.4
version: 2.0.4(@types/node@22.0.0) version: 2.0.4(@types/node@22.0.0)
wikibase-sdk:
specifier: ^10.0.2
version: 10.0.2
examples: examples:
dependencies: dependencies:
@ -6444,6 +6447,10 @@ packages:
resolution: {integrity: sha512-c9bZp7b5YtRj2wOe6dlj32MK+Bx/M/d+9VB2SHM1OtsUHR0aV0tdP6DWh/iMt0kWi1t5g1Iudu6hQRNd1A4PVA==} resolution: {integrity: sha512-c9bZp7b5YtRj2wOe6dlj32MK+Bx/M/d+9VB2SHM1OtsUHR0aV0tdP6DWh/iMt0kWi1t5g1Iudu6hQRNd1A4PVA==}
engines: {node: '>=18'} engines: {node: '>=18'}
wikibase-sdk@10.0.2:
resolution: {integrity: sha512-4J1efmQU9oUC66BtqJkiqvLNtF1XVOvPKfnHMFfyyRcAHLTTeXnh+lDFIDiyI2sbNt2Q7oa4UlsxDB3ARK4CJA==}
engines: {node: '>= 12.0.0'}
wikipedia@2.1.2: wikipedia@2.1.2:
resolution: {integrity: sha512-RAYaMpXC9/E873RaSEtlEa8dXK4e0p5k98GKOd210MtkE5emm6fcnwD+N6ZA4cuffjDWagvhaQKtp/mGp2BOVQ==} resolution: {integrity: sha512-RAYaMpXC9/E873RaSEtlEa8dXK4e0p5k98GKOd210MtkE5emm6fcnwD+N6ZA4cuffjDWagvhaQKtp/mGp2BOVQ==}
engines: {node: '>=10'} engines: {node: '>=10'}
@ -14477,6 +14484,8 @@ snapshots:
dependencies: dependencies:
string-width: 7.2.0 string-width: 7.2.0
wikibase-sdk@10.0.2: {}
wikipedia@2.1.2: wikipedia@2.1.2:
dependencies: dependencies:
axios: 1.7.2 axios: 1.7.2

Wyświetl plik

@ -158,6 +158,7 @@ Depending on the AI SDK and tool you want to use, you'll also need to install th
| [Twilio](https://www.twilio.com/docs/conversations/api) | `TwilioClient` | Twilio conversation API to send and receive SMS messages. | | [Twilio](https://www.twilio.com/docs/conversations/api) | `TwilioClient` | Twilio conversation API to send and receive SMS messages. |
| [Twitter](https://developer.x.com/en/docs/twitter-api) | `TwitterClient` | Basic Twitter API methods for fetching users, tweets, and searching recent tweets. Includes support for plan-aware rate-limiting. Uses [Nango](https://www.nango.dev) for OAuth support. | | [Twitter](https://developer.x.com/en/docs/twitter-api) | `TwitterClient` | Basic Twitter API methods for fetching users, tweets, and searching recent tweets. Includes support for plan-aware rate-limiting. Uses [Nango](https://www.nango.dev) for OAuth support. |
| [WeatherAPI](https://www.weatherapi.com) | `WeatherClient` | Basic access to current weather data based on location. | | [WeatherAPI](https://www.weatherapi.com) | `WeatherClient` | Basic access to current weather data based on location. |
| [Wikidata](https://www.wikidata.org/wiki/Wikidata:Data_access) | `WikidataClient` | Basic Wikidata client. |
| [Wikipedia](https://www.mediawiki.org/wiki/API) | `WikipediaClient` | Wikipedia page search and summaries. | | [Wikipedia](https://www.mediawiki.org/wiki/API) | `WikipediaClient` | Wikipedia page search and summaries. |
| [Wolfram Alpha](https://products.wolframalpha.com/llm-api/documentation) | `WolframAlphaClient` | Wolfram Alpha LLM API client for answering computational, mathematical, and scientific questions. | | [Wolfram Alpha](https://products.wolframalpha.com/llm-api/documentation) | `WolframAlphaClient` | Wolfram Alpha LLM API client for answering computational, mathematical, and scientific questions. |

Wyświetl plik

@ -24,5 +24,6 @@ export * from './social-data-client'
export * from './tavily-client' export * from './tavily-client'
export * from './twilio-client' export * from './twilio-client'
export * from './weather-client' export * from './weather-client'
export * from './wikidata-client'
export * from './wikipedia-client' export * from './wikipedia-client'
export * from './wolfram-alpha-client' export * from './wolfram-alpha-client'

Wyświetl plik

@ -0,0 +1,123 @@
import type * as wikibase from 'wikibase-sdk'
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import wdk from 'wikibase-sdk/wikidata.org'
import { AIFunctionsProvider } from '../fns'
import { assert, getEnv, throttleKy } from '../utils'
export namespace wikidata {
// Allow up to 200 requests per second by default.
export const throttle = pThrottle({
limit: 200,
interval: 1000
})
export type SimplifiedEntityMap = Record<string, SimplifiedEntity>
export interface SimplifiedEntity {
id: string
type: string
claims: Claims
modified: string
labels?: Descriptions
descriptions?: Descriptions
aliases?: any
sitelinks?: Sitelinks
}
export interface Claims {
[key: string]: Claim[]
}
export interface Claim {
value: string
qualifiers: Record<string, string[] | number[]>
references: Record<string, string[]>[]
}
export type Descriptions = Record<string, string>
export type Sitelinks = Record<string, string>
}
/**
* Basic Wikidata client.
*
* @see https://github.com/maxlath/wikibase-sdk
*
* TODO: support any wikibase instance
*/
export class WikidataClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiUserAgent: string
constructor({
apiUserAgent = getEnv('WIKIDATA_API_USER_AGENT') ??
'Agentic (https://github.com/transitive-bullshit/agentic)',
throttle = true,
ky = defaultKy
}: {
apiBaseUrl?: string
apiUserAgent?: string
throttle?: boolean
ky?: KyInstance
} = {}) {
assert(apiUserAgent, 'WikidataClient missing required "apiUserAgent"')
super()
this.apiUserAgent = apiUserAgent
const throttledKy = throttle ? throttleKy(ky, wikidata.throttle) : ky
this.ky = throttledKy.extend({
headers: {
'user-agent': apiUserAgent
}
})
}
async getEntityById(
idOrOpts: string | { id: string; languages?: string[] }
): Promise<wikidata.SimplifiedEntity> {
const { id, languages = ['en'] } =
typeof idOrOpts === 'string' ? { id: idOrOpts } : idOrOpts
const url = wdk.getEntities({
ids: id as wikibase.EntityId,
languages
})
const res = await this.ky.get(url).json<any>()
const entities = wdk.simplify.entities(res.entities, {
// TODO: Make this configurable and double-check defaults.
keepQualifiers: true,
keepReferences: true
})
const entity = entities[id]
return entity as wikidata.SimplifiedEntity
}
async getEntitiesByIds(
idsOrOpts: string[] | { ids: string; languages?: string[] }
): Promise<wikidata.SimplifiedEntityMap> {
const { ids, languages = ['en'] } = Array.isArray(idsOrOpts)
? { ids: idsOrOpts }
: idsOrOpts
// TODO: Separate between wdk.getEntities and wdk.getManyEntities depending
// on how many `ids` there are.
const url = wdk.getEntities({
ids: ids as wikibase.EntityId[],
languages
})
const res = await this.ky.get(url).json<any>()
const entities = wdk.simplify.entities(res.entities, {
keepQualifiers: true,
keepReferences: true
})
return entities as wikidata.SimplifiedEntityMap
}
}

Wyświetl plik

@ -45,7 +45,7 @@ export namespace wikipedia {
acceptLanguage?: string acceptLanguage?: string
} }
export interface PageSummary { export interface PageSummaryResponse {
ns?: number ns?: number
index?: number index?: number
type: string type: string
@ -182,6 +182,6 @@ export class WikipediaClient extends AIFunctionsProvider {
'accept-language': acceptLanguage 'accept-language': acceptLanguage
} }
}) })
.json<wikipedia.PageSummary>() .json<wikipedia.PageSummaryResponse>()
} }
} }