pull/643/head^2
Travis Fischer 2024-05-26 18:11:56 -05:00
rodzic ca31b560a8
commit 8eda307096
7 zmienionych plików z 174 dodań i 13 usunięć

Wyświetl plik

@ -5,7 +5,8 @@ import { gracefulExit } from 'exit-hook'
import restoreCursor from 'restore-cursor'
// import { ClearbitClient } from '../src/index.js'
import { ProxycurlClient } from '../src/services/proxycurl-client.js'
// import { ProxycurlClient } from '../src/services/proxycurl-client.js'
import { WikipediaClient } from '../src/services/wikipedia-client.js'
/**
* Scratch pad for testing.
@ -19,10 +20,16 @@ async function main() {
// })
// console.log(JSON.stringify(res, null, 2))
const proxycurl = new ProxycurlClient()
const res = await proxycurl.getLinkedInPerson({
linkedin_profile_url: 'https://linkedin.com/in/fisch2'
// personal_email: 'fisch0920@gmail.com'
// const proxycurl = new ProxycurlClient()
// const res = await proxycurl.getLinkedInPerson({
// linkedin_profile_url: 'https://linkedin.com/in/fisch2'
// // personal_email: 'fisch0920@gmail.com'
// })
// console.log(JSON.stringify(res, null, 2))
const wikipedia = new WikipediaClient()
const res = await wikipedia.getPageSummary({
title: 'Naruto_(TV_series)'
})
console.log(JSON.stringify(res, null, 2))

Wyświetl plik

@ -33,6 +33,7 @@
- serper
- twitter
- weatherapi
- wikipedia
## TODO
@ -45,7 +46,6 @@
- walter
- services
- wolfram alpha
- wikipedia
- midjourney
- unstructured
- pull from [langchain](https://github.com/langchain-ai/langchainjs/tree/main/langchain)

Wyświetl plik

@ -160,7 +160,7 @@ export class ExaClient {
* Performs an Exa search for the given query.
*/
async search(opts: { query: string } & exa.RegularSearchOptions) {
return this.ky.post('search', { json: opts }).json<exa.SearchResponse>()
return this.ky.get('search', { json: opts }).json<exa.SearchResponse>()
}
/**
@ -202,8 +202,6 @@ export class ExaClient {
/**
* Finds similar links to the provided URL and returns the contents of the
* documents.
*
* @param {string} url - The URL for which to find similar links.
*/
async findSimilarAndContents<
T extends exa.ContentsOptions = exa.ContentsOptions
@ -232,8 +230,6 @@ export class ExaClient {
/**
* Retrieves contents of documents based on a list of document IDs.
*
* @param {string | string[] | SearchResult[]} ids - An array of document IDs.
*/
async getContents<T extends exa.ContentsOptions = exa.ContentsOptions>({
ids,

Wyświetl plik

@ -13,3 +13,4 @@ export * from './serpapi-client.js'
export * from './serper-client.js'
export * from './twitter-client.js'
export * from './weather-client.js'
export * from './wikipedia-client.js'

Wyświetl plik

@ -0,0 +1,158 @@
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { assert, getEnv, throttleKy } from '../utils.js'
export namespace wikipedia {
// Only allow 200 requests per second
export const throttle = pThrottle({
limit: 200,
interval: 1000
})
export interface SearchOptions {
query: string
limit?: number
}
export interface PageSearchResponse {
pages: Page[]
}
export interface Page {
id: number
key: string
title: string
matched_title: null
excerpt: string
description: null | string
thumbnail: Thumbnail | null
}
export interface Thumbnail {
url: string
width: number
height: number
mimetype: string
duration: null
}
export interface PageSummaryOptions {
title: string
redirect?: boolean
acceptLanguage?: string
}
export interface PageSummary {
ns?: number
index?: number
type: string
title: string
displaytitle: string
namespace: { id: number; text: string }
wikibase_item: string
titles: { canonical: string; normalized: string; display: string }
pageid: number
thumbnail: {
source: string
width: number
height: number
}
originalimage: {
source: string
width: number
height: number
}
lang: string
dir: string
revision: string
tid: string
timestamp: string
description: string
description_source: string
content_urls: {
desktop: {
page: string
revisions: string
edit: string
talk: string
}
mobile: {
page: string
revisions: string
edit: string
talk: string
}
}
extract: string
extract_html: string
normalizedtitle?: string
coordinates?: {
lat: number
lon: number
}
}
}
export class WikipediaClient {
readonly apiBaseUrl: string
readonly apiUserAgent: string
readonly ky: KyInstance
constructor({
apiBaseUrl = getEnv('WIKIPEDIA_API_BASE_URL') ??
'https://en.wikipedia.org/api/rest_v1',
apiUserAgent = getEnv('WIKIPEDIA_API_USER_AGENT') ??
'Agentic (https://github.com/transitive-bullshit/agentic)',
throttle = true,
ky = defaultKy
}: {
apiBaseUrl?: string
apiUserAgent?: string
throttle?: boolean
ky?: KyInstance
} = {}) {
assert(apiBaseUrl, 'WikipediaClient missing required "apiBaseUrl"')
assert(apiUserAgent, 'WikipediaClient missing required "apiUserAgent"')
this.apiBaseUrl = apiBaseUrl
this.apiUserAgent = apiUserAgent
const throttledKy = throttle ? throttleKy(ky, wikipedia.throttle) : ky
this.ky = throttledKy.extend({
headers: {
'api-user-agent': apiUserAgent
}
})
}
async search({ query, ...opts }: wikipedia.SearchOptions) {
return (
// https://www.mediawiki.org/wiki/API:REST_API
this.ky
.get('https://en.wikipedia.org/w/rest.php/v1/search/page', {
searchParams: { q: query, ...opts }
})
.json<wikipedia.PageSearchResponse>()
)
}
async getPageSummary({
title,
acceptLanguage = 'en-us',
redirect = true,
...opts
}: wikipedia.PageSummaryOptions) {
// https://en.wikipedia.org/api/rest_v1/
return this.ky
.get(`page/summary/${title}`, {
prefixUrl: this.apiBaseUrl,
searchParams: { redirect, ...opts },
headers: {
'accept-language': acceptLanguage
}
})
.json<wikipedia.PageSummary>()
}
}

Wyświetl plik

@ -16,7 +16,6 @@
// "emitDecoratorMetadata": true,
"strict": true,
"strictNullChecks": true,
"noUncheckedIndexedAccess": true,
"forceConsistentCasingInFileNames": true,

Wyświetl plik

@ -4,7 +4,7 @@ export default defineConfig([
{
entry: ['src/index.ts'],
outDir: 'dist',
target: 'node18',
target: 'node22',
platform: 'node',
format: ['esm'],
splitting: false,