kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
feat: add WikidataClient
rodzic
84544c774c
commit
6ba3da68c3
|
@ -0,0 +1 @@
|
|||
out
|
|
@ -39,3 +39,4 @@ next-env.d.ts
|
|||
.env
|
||||
|
||||
old/
|
||||
out/
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
out
|
|
@ -3,25 +3,7 @@ import 'dotenv/config'
|
|||
|
||||
import restoreCursor from 'restore-cursor'
|
||||
|
||||
// import { SearxngClient } from '../src/services/searxng-client'
|
||||
// import { ClearbitClient } from '../src/index'
|
||||
// import { ProxycurlClient } from '../src/services/proxycurl-client'
|
||||
// import { WikipediaClient } from '../src/index'
|
||||
// import { PerigonClient } from '../src/index'
|
||||
// import { FirecrawlClient } from '../src/index'
|
||||
// import { ExaClient } from '../src/index'
|
||||
// import { DiffbotClient } from '../src/index'
|
||||
// import { WolframAlphaClient } from '../src/index'
|
||||
// import {
|
||||
// createTwitterV2Client,
|
||||
// TwitterClient
|
||||
// } from '../src/services/twitter/index'
|
||||
// import { MidjourneyClient } from '../src/index'
|
||||
// import { BingClient } from '../src/index'
|
||||
// import { TavilyClient } from '../src/index'
|
||||
// import { SocialDataClient } from '../src/index'
|
||||
// import { HunterClient } from '../src/index'
|
||||
import { JinaClient } from '../src/index'
|
||||
import { WikipediaClient } from '../src'
|
||||
|
||||
/**
|
||||
* Scratch pad for testing.
|
||||
|
@ -137,17 +119,27 @@ async function main() {
|
|||
// })
|
||||
// console.log(JSON.stringify(res, null, 2))
|
||||
|
||||
const jina = new JinaClient()
|
||||
const res = await jina.readUrl({
|
||||
url: 'https://news.ycombinator.com'
|
||||
// returnFormat: 'screenshot'
|
||||
// json: true
|
||||
})
|
||||
// const jina = new JinaClient()
|
||||
// const res = await jina.readUrl({
|
||||
// url: 'https://news.ycombinator.com'
|
||||
// // returnFormat: 'screenshot'
|
||||
// // json: true
|
||||
// })
|
||||
// const res = await jina.search({
|
||||
// query: 'trump assassination attempt',
|
||||
// // returnFormat: 'screenshot',
|
||||
// json: true
|
||||
// })
|
||||
|
||||
// const serper = new SerpAPIClient()
|
||||
// const res = await serper.search({
|
||||
// q: 'elon musk'
|
||||
// })
|
||||
const wikipedia = new WikipediaClient()
|
||||
const res = await wikipedia.getPageSummary({
|
||||
title: 'Elon_musk'
|
||||
})
|
||||
|
||||
console.log(JSON.stringify(res, null, 2))
|
||||
}
|
||||
|
||||
|
|
|
@ -131,7 +131,8 @@
|
|||
"tsx": "^4.16.2",
|
||||
"twitter-api-sdk": "^1.2.1",
|
||||
"typescript": "^5.5.4",
|
||||
"vitest": "2.0.4"
|
||||
"vitest": "2.0.4",
|
||||
"wikibase-sdk": "^10.0.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@dexaai/dexter": "^2.0.3",
|
||||
|
@ -143,7 +144,8 @@
|
|||
"llamaindex": "^0.3.16",
|
||||
"mathjs": "^13.0.0",
|
||||
"octokit": "^4.0.2",
|
||||
"twitter-api-sdk": "^1.2.1"
|
||||
"twitter-api-sdk": "^1.2.1",
|
||||
"wikibase-sdk": "^10.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@dexaai/dexter": {
|
||||
|
@ -175,6 +177,9 @@
|
|||
},
|
||||
"twitter-api-sdk": {
|
||||
"optional": true
|
||||
},
|
||||
"wikibase-sdk": {
|
||||
"optional": true
|
||||
}
|
||||
},
|
||||
"lint-staged": {
|
||||
|
|
|
@ -141,6 +141,9 @@ importers:
|
|||
vitest:
|
||||
specifier: 2.0.4
|
||||
version: 2.0.4(@types/node@22.0.0)
|
||||
wikibase-sdk:
|
||||
specifier: ^10.0.2
|
||||
version: 10.0.2
|
||||
|
||||
examples:
|
||||
dependencies:
|
||||
|
@ -6444,6 +6447,10 @@ packages:
|
|||
resolution: {integrity: sha512-c9bZp7b5YtRj2wOe6dlj32MK+Bx/M/d+9VB2SHM1OtsUHR0aV0tdP6DWh/iMt0kWi1t5g1Iudu6hQRNd1A4PVA==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
wikibase-sdk@10.0.2:
|
||||
resolution: {integrity: sha512-4J1efmQU9oUC66BtqJkiqvLNtF1XVOvPKfnHMFfyyRcAHLTTeXnh+lDFIDiyI2sbNt2Q7oa4UlsxDB3ARK4CJA==}
|
||||
engines: {node: '>= 12.0.0'}
|
||||
|
||||
wikipedia@2.1.2:
|
||||
resolution: {integrity: sha512-RAYaMpXC9/E873RaSEtlEa8dXK4e0p5k98GKOd210MtkE5emm6fcnwD+N6ZA4cuffjDWagvhaQKtp/mGp2BOVQ==}
|
||||
engines: {node: '>=10'}
|
||||
|
@ -14477,6 +14484,8 @@ snapshots:
|
|||
dependencies:
|
||||
string-width: 7.2.0
|
||||
|
||||
wikibase-sdk@10.0.2: {}
|
||||
|
||||
wikipedia@2.1.2:
|
||||
dependencies:
|
||||
axios: 1.7.2
|
||||
|
|
|
@ -158,6 +158,7 @@ Depending on the AI SDK and tool you want to use, you'll also need to install th
|
|||
| [Twilio](https://www.twilio.com/docs/conversations/api) | `TwilioClient` | Twilio conversation API to send and receive SMS messages. |
|
||||
| [Twitter](https://developer.x.com/en/docs/twitter-api) | `TwitterClient` | Basic Twitter API methods for fetching users, tweets, and searching recent tweets. Includes support for plan-aware rate-limiting. Uses [Nango](https://www.nango.dev) for OAuth support. |
|
||||
| [WeatherAPI](https://www.weatherapi.com) | `WeatherClient` | Basic access to current weather data based on location. |
|
||||
| [Wikidata](https://www.wikidata.org/wiki/Wikidata:Data_access) | `WikidataClient` | Basic Wikidata client. |
|
||||
| [Wikipedia](https://www.mediawiki.org/wiki/API) | `WikipediaClient` | Wikipedia page search and summaries. |
|
||||
| [Wolfram Alpha](https://products.wolframalpha.com/llm-api/documentation) | `WolframAlphaClient` | Wolfram Alpha LLM API client for answering computational, mathematical, and scientific questions. |
|
||||
|
||||
|
|
|
@ -24,5 +24,6 @@ export * from './social-data-client'
|
|||
export * from './tavily-client'
|
||||
export * from './twilio-client'
|
||||
export * from './weather-client'
|
||||
export * from './wikidata-client'
|
||||
export * from './wikipedia-client'
|
||||
export * from './wolfram-alpha-client'
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
import type * as wikibase from 'wikibase-sdk'
|
||||
import defaultKy, { type KyInstance } from 'ky'
|
||||
import pThrottle from 'p-throttle'
|
||||
import wdk from 'wikibase-sdk/wikidata.org'
|
||||
|
||||
import { AIFunctionsProvider } from '../fns'
|
||||
import { assert, getEnv, throttleKy } from '../utils'
|
||||
|
||||
export namespace wikidata {
|
||||
// Allow up to 200 requests per second by default.
|
||||
export const throttle = pThrottle({
|
||||
limit: 200,
|
||||
interval: 1000
|
||||
})
|
||||
|
||||
export type SimplifiedEntityMap = Record<string, SimplifiedEntity>
|
||||
|
||||
export interface SimplifiedEntity {
|
||||
id: string
|
||||
type: string
|
||||
claims: Claims
|
||||
modified: string
|
||||
labels?: Descriptions
|
||||
descriptions?: Descriptions
|
||||
aliases?: any
|
||||
sitelinks?: Sitelinks
|
||||
}
|
||||
|
||||
export interface Claims {
|
||||
[key: string]: Claim[]
|
||||
}
|
||||
|
||||
export interface Claim {
|
||||
value: string
|
||||
qualifiers: Record<string, string[] | number[]>
|
||||
references: Record<string, string[]>[]
|
||||
}
|
||||
|
||||
export type Descriptions = Record<string, string>
|
||||
export type Sitelinks = Record<string, string>
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic Wikidata client.
|
||||
*
|
||||
* @see https://github.com/maxlath/wikibase-sdk
|
||||
*
|
||||
* TODO: support any wikibase instance
|
||||
*/
|
||||
export class WikidataClient extends AIFunctionsProvider {
|
||||
protected readonly ky: KyInstance
|
||||
protected readonly apiUserAgent: string
|
||||
|
||||
constructor({
|
||||
apiUserAgent = getEnv('WIKIDATA_API_USER_AGENT') ??
|
||||
'Agentic (https://github.com/transitive-bullshit/agentic)',
|
||||
throttle = true,
|
||||
ky = defaultKy
|
||||
}: {
|
||||
apiBaseUrl?: string
|
||||
apiUserAgent?: string
|
||||
throttle?: boolean
|
||||
ky?: KyInstance
|
||||
} = {}) {
|
||||
assert(apiUserAgent, 'WikidataClient missing required "apiUserAgent"')
|
||||
super()
|
||||
|
||||
this.apiUserAgent = apiUserAgent
|
||||
|
||||
const throttledKy = throttle ? throttleKy(ky, wikidata.throttle) : ky
|
||||
|
||||
this.ky = throttledKy.extend({
|
||||
headers: {
|
||||
'user-agent': apiUserAgent
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
async getEntityById(
|
||||
idOrOpts: string | { id: string; languages?: string[] }
|
||||
): Promise<wikidata.SimplifiedEntity> {
|
||||
const { id, languages = ['en'] } =
|
||||
typeof idOrOpts === 'string' ? { id: idOrOpts } : idOrOpts
|
||||
|
||||
const url = wdk.getEntities({
|
||||
ids: id as wikibase.EntityId,
|
||||
languages
|
||||
})
|
||||
|
||||
const res = await this.ky.get(url).json<any>()
|
||||
const entities = wdk.simplify.entities(res.entities, {
|
||||
// TODO: Make this configurable and double-check defaults.
|
||||
keepQualifiers: true,
|
||||
keepReferences: true
|
||||
})
|
||||
|
||||
const entity = entities[id]
|
||||
return entity as wikidata.SimplifiedEntity
|
||||
}
|
||||
|
||||
async getEntitiesByIds(
|
||||
idsOrOpts: string[] | { ids: string; languages?: string[] }
|
||||
): Promise<wikidata.SimplifiedEntityMap> {
|
||||
const { ids, languages = ['en'] } = Array.isArray(idsOrOpts)
|
||||
? { ids: idsOrOpts }
|
||||
: idsOrOpts
|
||||
|
||||
// TODO: Separate between wdk.getEntities and wdk.getManyEntities depending
|
||||
// on how many `ids` there are.
|
||||
const url = wdk.getEntities({
|
||||
ids: ids as wikibase.EntityId[],
|
||||
languages
|
||||
})
|
||||
|
||||
const res = await this.ky.get(url).json<any>()
|
||||
const entities = wdk.simplify.entities(res.entities, {
|
||||
keepQualifiers: true,
|
||||
keepReferences: true
|
||||
})
|
||||
|
||||
return entities as wikidata.SimplifiedEntityMap
|
||||
}
|
||||
}
|
|
@ -45,7 +45,7 @@ export namespace wikipedia {
|
|||
acceptLanguage?: string
|
||||
}
|
||||
|
||||
export interface PageSummary {
|
||||
export interface PageSummaryResponse {
|
||||
ns?: number
|
||||
index?: number
|
||||
type: string
|
||||
|
@ -182,6 +182,6 @@ export class WikipediaClient extends AIFunctionsProvider {
|
|||
'accept-language': acceptLanguage
|
||||
}
|
||||
})
|
||||
.json<wikipedia.PageSummary>()
|
||||
.json<wikipedia.PageSummaryResponse>()
|
||||
}
|
||||
}
|
||||
|
|
Ładowanie…
Reference in New Issue