pull/643/head^2
Travis Fischer 2024-06-03 00:03:01 -05:00
rodzic b03a38c6e8
commit b195c6b8f5
3 zmienionych plików z 671 dodań i 139 usunięć

Wyświetl plik

@ -1,13 +1,13 @@
#!/usr/bin/env node
import 'dotenv/config'
import { gracefulExit } from 'exit-hook'
import restoreCursor from 'restore-cursor'
// import { SearxngClient } from '../src/services/searxng-client.js'
// import { ClearbitClient } from '../src/index.js'
// import { ProxycurlClient } from '../src/services/proxycurl-client.js'
import { WikipediaClient } from '../src/index.js'
// import { WikipediaClient } from '../src/index.js'
import { PerigonClient } from '../src/index.js'
/**
* Scratch pad for testing.
@ -27,12 +27,12 @@ async function main() {
// })
// console.log(JSON.stringify(res, null, 2))
const wikipedia = new WikipediaClient()
const res = await wikipedia.getPageSummary({
// title: 'Naruto_(TV_series)'
title: 'SpaceX'
})
console.log(JSON.stringify(res, null, 2))
// const wikipedia = new WikipediaClient()
// const res = await wikipedia.getPageSummary({
// // title: 'Naruto_(TV_series)'
// title: 'SpaceX'
// })
// console.log(JSON.stringify(res, null, 2))
// const searxng = new SearxngClient()
// const res = await searxng.search({
@ -41,12 +41,18 @@ async function main() {
// })
// console.log(JSON.stringify(res, null, 2))
return gracefulExit(0)
const perigon = new PerigonClient()
const res = await perigon.searchArticles({
q: 'AI agents AND startup',
sourceGroup: 'top50tech'
})
console.log(JSON.stringify(res, null, 2))
}
try {
await main()
} catch (err) {
console.error('unexpected error', err)
gracefulExit(1)
process.exit(1)
}

Wyświetl plik

@ -81,7 +81,6 @@
"del-cli": "^5.1.0",
"dotenv": "^16.4.5",
"eslint": "^8.57.0",
"exit-hook": "^4.0.0",
"husky": "^9.0.11",
"lint-staged": "^15.2.5",
"np": "^10.0.5",

Wyświetl plik

@ -1,105 +1,401 @@
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { z } from 'zod'
import { assert, getEnv, throttleKy } from '../utils.js'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, pruneUndefined, throttleKy } from '../utils.js'
export namespace perigon {
// Allow up to 20 requests per minute by default.
// Allow up to 2 requests per second by default.
export const throttle = pThrottle({
limit: 20,
interval: 60 * 1000,
limit: 2,
interval: 1000,
strict: true
})
export type ArticleLabel =
| 'Opinion'
| 'Non-news'
| 'Paid News'
| 'Fact Check'
| 'Pop Culture'
| 'Roundup'
| 'Press Release'
export const DEFAULT_PAGE_SIZE = 10
export const MAX_PAGE_SIZE = 100
export type Categories =
| 'Politics'
| 'Tech'
| 'Sports'
| 'Business'
| 'Finance'
| 'Entertainment'
| 'Health'
| 'Weather'
| 'Lifestyle'
| 'Auto'
| 'Science'
| 'Travel'
| 'Environment'
| 'World'
| 'General'
| 'none'
export const ArticleLabelSchema = z.union([
z.literal('Opinion'),
z.literal('Non-news'),
z.literal('Paid News'),
z.literal('Fact Check'),
z.literal('Pop Culture'),
z.literal('Roundup'),
z.literal('Press Release')
])
export type ArticleLabel = z.infer<typeof ArticleLabelSchema>
export type ArticlesOptions = {
q: string
title?: string
desc?: string
content?: string
url?: string
from?: string | Date
to?: string | Date
addDateFrom?: string | Date
addDateTo?: string | Date
refreshDateFrom?: string | Date
refreshDateTo?: string | Date
articleId?: string
clusterId?: string
medium?: 'article' | 'video'
source?: string
sourceGroup?:
| 'top10'
| 'top100'
| 'top500English'
| 'top25crypto'
| 'top25finance'
| 'top50tech'
| 'top100sports'
| 'top100leftUS'
| 'top100rightUS'
| 'top100centerUS'
excludeSource?: string
paywall?: boolean
country?: string
language?: string
label?: ArticleLabel
excludeLabel?: ArticleLabel | 'Low Content'
byline?: string
topic?: string
category?: Categories
journalistId?: string
state?: string
city?: string
area?: string
location?: string
sortBy?: 'date' | 'relevance' | 'addDate' | 'pubDate' | 'refreshDate'
relevance?: number
size?: number
showReprints?: boolean
showNumResults?: boolean
type?: 'all' | 'local' | 'world'
linkTo?: string
reprintGroupId?: string
personWikidataId?: string[]
personName?: string[]
companyId?: string[]
companyName?: string
companyDomain?: string[]
companySymbol?: string[]
maxDistance?: number
lat?: number
lon?: number
searchTranslation?: boolean
}
export const CategoriesSchema = z.union([
z.literal('Politics'),
z.literal('Tech'),
z.literal('Sports'),
z.literal('Business'),
z.literal('Finance'),
z.literal('Entertainment'),
z.literal('Health'),
z.literal('Weather'),
z.literal('Lifestyle'),
z.literal('Auto'),
z.literal('Science'),
z.literal('Travel'),
z.literal('Environment'),
z.literal('World'),
z.literal('General'),
z.literal('none')
])
export type Categories = z.infer<typeof CategoriesSchema>
export type ArticlesResponse = {
export const SourceGroupSchema = z.union([
z.literal('top10').describe('Top 10 most popular sources globally'),
z.literal('top100').describe('Top 100 most popular sources globally'),
z
.literal('top500English')
.describe('Top 500 most popular (English) sources globally'),
z
.literal('top25crypto')
.describe(
'Top 25 most popular sources covering cryptocurrency & blockchain developments'
),
z
.literal('top25finance')
.describe(
'Top 25 most popular sources covering financial news, movement in the markets & public equities'
),
z
.literal('top50tech')
.describe('Top 50 sources covering new technology & businesses in tech'),
z
.literal('top100sports')
.describe(
'Top 100 most popular (English) sources covering sports of all types'
),
z
.literal('top100leftUS')
.describe(
'Top 100 most popular (US) sources with an average political bias rating of Left or Leans Left'
),
z
.literal('top100rightUS')
.describe(
'Top 100 most popular (US) sources with an average political bias rating of Right or Leans Right'
),
z
.literal('top100centerUS')
.describe(
'Top 100 most popular (US) sources with an average political bias rating of Center or Middle'
)
])
export type SourceGroup = z.infer<typeof SourceGroupSchema>
export const SortBySchema = z.union([
z.literal('date'),
z.literal('relevance'),
z.literal('addDate'),
z.literal('pubDate'),
z.literal('refreshDate')
])
export type SortBy = z.infer<typeof SortBySchema>
export const ArticlesSearchOptionsSchema = z.object({
q: z.string()
.describe(`Search query. It may use boolean operators (AND, OR, NOT) and quotes for exact matching. Example search queries:
- AI agents
- Compare the latest predictions and popular opinions on the 2024 US election
- "elon musk" AND tesla
- (upcoming release OR launch) AND apple
- (Google OR Amazon) AND NOT ("Jeff Bezos" OR Android)
- "climate change"
- Crypto* OR Bitcoin NOT Ethereum
`),
title: z
.string()
.optional()
.describe(
'Search query which applies only to article titles / headlines.'
),
desc: z.string().optional(),
content: z.string().optional(),
url: z.string().optional(),
from: z
.string()
.optional()
.describe(
'Filter to only return articles published after the specified date (ISO or "yyyy-mm-dd" format)'
),
to: z
.string()
.optional()
.describe(
'Filter to only return articles published before the specified date (ISO or "yyyy-mm-dd" format)'
),
addDateFrom: z.string().optional(),
addDateTo: z.string().optional(),
refreshDateFrom: z.string().optional(),
refreshDateTo: z.string().optional(),
articleId: z.string().optional(),
clusterId: z.string().optional(),
medium: z.union([z.literal('article'), z.literal('video')]).optional(),
source: z
.string()
.optional()
.describe("Filter articles from a specific publisher's source domain."),
sourceGroup: SourceGroupSchema.optional().describe(
'The source group to retrieve articles from.'
),
excludeSource: z
.string()
.optional()
.describe(
'Source website domains which should be excluded from the search. Wildcards (* and ?) are suported (e.g. "*.cnn.com").'
),
paywall: z
.boolean()
.optional()
.describe(
'Filter to show only results where the source has a paywall (true) or does not have a paywall (false).'
),
country: z
.string()
.optional()
.describe('Country code to filter by country.'),
language: z.string().optional(),
label: ArticleLabelSchema.optional().describe(
'Labels to filter by, could be "Opinion", "Paid-news", "Non-news", etc. If multiple parameters are passed, they will be applied as OR operations.'
),
excludeLabel: z
.union([ArticleLabelSchema, z.literal('Low Content')])
.optional()
.describe(
'Exclude results that include specific labels ("Opinion", "Non-news", "Paid News", etc.). You can filter multiple by repeating the parameter.'
),
byline: z.string().optional(),
topic: z.string().optional(),
category: CategoriesSchema.optional().describe(
'Filter by categories. Categories are general themes that the article is about. Examples of categories: Tech, Politics, etc. If multiple parameters are passed, they will be applied as OR operations. Use "none" to search uncategorized articles.'
),
journalistId: z.string().optional(),
state: z
.string()
.optional()
.describe(
'Filters articles where a specified state plays a central role in the content, beyond mere mentions, to ensure the results are deeply relevant to the state in question.'
),
city: z
.string()
.optional()
.describe(
'Filters articles where a specified city plays a central role in the content, beyond mere mentions, to ensure the results are deeply relevant to the urban area in question.'
),
area: z
.string()
.optional()
.describe(
'Filters articles where a specified area, such as a neighborhood, borough, or district, plays a central role in the content, beyond mere mentions, to ensure the results are deeply relevant to the area in question.'
),
location: z.string().optional(),
sortBy: SortBySchema.default('relevance')
.optional()
.describe('How to sort the article results.'),
showReprints: z
.boolean()
.optional()
.describe(
'Whether to return reprints in the response or not. Reprints are usually wired articles from sources like AP or Reuters that are reprinted in multiple sources at the same time. By default, this parameter is "true".'
),
showNumResults: z.boolean().optional(),
type: z
.union([z.literal('all'), z.literal('local'), z.literal('world')])
.optional(),
linkTo: z.string().optional(),
reprintGroupId: z.string().optional(),
personWikidataId: z.array(z.string()).optional(),
personName: z
.array(z.string())
.optional()
.describe('List of person names for exact matches.'),
companyId: z.array(z.string()).optional(),
companyName: z.string().optional().describe('Search by company name.'),
companyDomain: z
.array(z.string())
.optional()
.describe('Search by company domain.'),
companySymbol: z
.array(z.string())
.optional()
.describe('Search by company stock ticker symbol.'),
maxDistance: z.number().optional(),
lat: z.number().optional(),
lon: z.number().optional(),
searchTranslation: z
.boolean()
.optional()
.describe(
'Expand a query to search the translation, translatedTitle, and translatedDescription fields for non-English articles.'
),
page: z
.number()
.int()
.positive()
.max(10_000)
.default(0)
.optional()
.describe('Page number of results to return (zero-based)'),
size: z
.number()
.int()
.positive()
.max(DEFAULT_PAGE_SIZE)
.optional()
.describe('Number of results to return per page')
})
export type ArticlesSearchOptions = z.infer<
typeof ArticlesSearchOptionsSchema
>
export const StoriesSearchOptionsSchema = ArticlesSearchOptionsSchema.pick({
q: true,
clusterId: true,
topic: true,
category: true,
from: true,
to: true,
state: true,
city: true,
area: true,
showNumResults: true,
page: true,
size: true,
sourceGroup: true,
personWikidataId: true,
personName: true,
companyId: true,
companyName: true,
companyDomain: true,
companySymbol: true
}).extend({
name: z.string().optional().describe('Search stories by name.'),
nameExists: z.boolean().optional(),
initializedFrom: z.string().optional(),
initializedTo: z.string().optional(),
updatedFrom: z.string().optional(),
updatedTo: z.string().optional(),
minClusterSize: z.number().optional(),
maxClusterSize: z.number().optional(),
showDuplicates: z
.boolean()
.optional()
.describe(
'Stories are deduplicated by default. If a story is deduplicated, all future articles are merged into the original story. `duplicateOf` field contains the original cluster id. When showDuplicates=true, all stories are shown.'
),
sortBy: z
.union([
z.literal('count'),
z.literal('createdAt'),
z.literal('updatedAt')
])
.optional()
.describe('How to sort the results.')
})
export type StoriesSearchOptions = z.infer<typeof StoriesSearchOptionsSchema>
export const PeopleSearchOptionsSchema = z.object({
name: z
.string()
.describe(
'Person name query to search for. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
wikidataId: z
.array(z.string())
.optional()
.describe('Search by ID of Wikidata entity.'),
occupationId: z
.array(z.string())
.optional()
.describe('Search by Wikidata occupation ID.'),
occupationLabel: z
.string()
.optional()
.describe('Search by occupation name.'),
size: z
.number()
.int()
.positive()
.max(DEFAULT_PAGE_SIZE)
.optional()
.describe('Number of results to return per page')
})
export type PeopleSearchOptions = z.infer<typeof PeopleSearchOptionsSchema>
export const CompanySearchOptionsSchema = z.object({
q: z
.string()
.optional()
.describe(
'Company search query. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
name: z
.string()
.optional()
.describe(
'Search by company name. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
industry: z
.string()
.optional()
.describe(
'Search by company industry. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
sector: z
.string()
.optional()
.describe(
'Search by company sector. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
id: z.array(z.string()).optional().describe('Search by company ID.'),
symbol: z
.array(z.string())
.optional()
.describe('Search by company stock ticker symbol.'),
domain: z
.array(z.string())
.optional()
.describe('Search by company domain.'),
country: z.string().optional().describe('Search by country.'),
exchange: z.string().optional().describe('Search by exchange name.'),
numEmployeesFrom: z
.number()
.int()
.positive()
.optional()
.describe('Minimum number of employees.'),
numEmployeesTo: z
.number()
.int()
.positive()
.optional()
.describe('Maximum number of employees.'),
ipoFrom: z
.string()
.optional()
.describe('Starting IPO date (ISO or "yyyy-mm-dd" format)'),
ipoTo: z
.string()
.optional()
.describe('Ending IPO date (ISO or "yyyy-mm-dd" format)'),
size: z
.number()
.int()
.positive()
.max(DEFAULT_PAGE_SIZE)
.optional()
.describe('Number of results to return per page')
})
export type CompanySearchOptions = z.infer<typeof CompanySearchOptionsSchema>
export type ArticlesSearchResponse = {
status: number
numResults: number
articles: Article[]
@ -157,31 +453,7 @@ export namespace perigon {
places: null
}
export type StoriesOptions = {
clusterId?: string
topic?: string
category?: Categories
q?: string
name?: string
nameExists?: boolean
from?: string
to?: string
initializedFrom?: string
initializedTo?: string
updatedFrom?: string
updatedTo?: string
minClusterSize?: number
maxClusterSize?: number
state?: string
city?: string
area?: string
page?: number
size?: number
sortBy?: 'count' | 'createdAt' | 'updatedAt'
showNumResults?: boolean
}
export type StoriesResponse = {
export type StoriesSearchResponse = {
status: number
numResults: number
results: Story[]
@ -251,15 +523,109 @@ export namespace perigon {
county?: string
}>
}
export interface PeopleSearchResponse {
status: number
numResults: number
results: Person[]
}
export interface Person {
wikidataId: string
name: string
gender: Gender
dateOfBirth: DateOfBirth
dateOfDeath: any
description: string
aliases: string[]
occupation: Occupation[]
position: Position[]
politicalParty: PoliticalParty[]
image?: Image
abstract: string
}
export interface Gender {
wikidataId: string
label: string
}
export interface DateOfBirth {
time: string
precision: string
}
export interface Occupation {
wikidataId: string
label: string
}
export interface Position {
wikidataId: string
label: string
startTime: any
endTime: any
employer: any
}
export interface PoliticalParty {
wikidataId: string
label: string
startTime: any
endTime: any
}
export interface Image {
url: string
}
export interface CompanySearchResponse {
status: number
numResults: number
results: Company[]
}
export interface Company {
id: string
name: string
altNames: string[]
domains: string[]
monthlyVisits: number
globalRank?: number
description: string
ceo: any
industry: string
sector: any
country: string
fullTimeEmployees?: number
address: any
city: any
state: any
zip: any
logo?: string
favicon?: string
isEtf: boolean
isActivelyTrading: any
isFund: boolean
isAdr: boolean
symbols: any[]
}
}
/**
* @see https://www.goperigon.com
* **The intelligent news API**
*
* Real-time global news and web content data from 140,000+ sources.
*
* - search news articles
* - search news stories (clusters of related news articles)
* - search people, companies, topics, and journalists
*
* @see https://www.goperigon.com/products/news-api
*/
export class PerigonClient {
export class PerigonClient extends AIFunctionsProvider {
readonly ky: KyInstance
readonly apiKey: string
readonly _maxPageSize = 10
constructor({
apiKey = getEnv('PERIGON_API_KEY'),
@ -277,6 +643,7 @@ export class PerigonClient {
apiKey,
'PerigonClient missing required "apiKey" (defaults to "PERIGON_API_KEY")'
)
super()
this.apiKey = apiKey
@ -288,29 +655,189 @@ export class PerigonClient {
})
}
async articles(options: perigon.ArticlesOptions) {
/**
* @see https://docs.goperigon.com/reference/all-news
*/
@aiFunction({
name: 'search_news_articles',
description:
'Search for news articles indexed by Perigon. Articles can optionally be filtered by various parameters.',
inputSchema: perigon.ArticlesSearchOptionsSchema.pick({
q: true,
title: true,
from: true,
to: true,
source: true,
sourceGroup: true,
excludeSource: true,
category: true,
personName: true,
companyName: true,
companyDomain: true,
sortBy: true
})
})
async searchArticles(opts: perigon.ArticlesSearchOptions) {
const {
personWikidataId,
personName,
companyId,
companyDomain,
companySymbol,
...params
} = opts
const arrayParams = pruneUndefined({
personWikidataId: personWikidataId?.join(','),
personName: personName?.join(','),
companyId: companyId?.join(','),
companyDomain: companyDomain?.join(','),
companySymbol: companySymbol?.join(',')
})
return this.ky
.get('all', {
// @ts-expect-error there are multiple query params that array of strings
// and KY SearchParamsOption shows a TS error for those types
searchParams: {
...arrayParams,
...params,
apiKey: this.apiKey,
...options,
size: Math.min(this._maxPageSize, options.size || this._maxPageSize)
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
}
})
.json<perigon.ArticlesResponse>()
.json<perigon.ArticlesSearchResponse>()
}
async stories(options: perigon.StoriesOptions) {
/**
* @see https://docs.goperigon.com/reference/stories-1
*/
@aiFunction({
name: 'search_news_stories',
description:
'Search for news stories indexed by Perigon. Stories are clusters of related news articles and are useful for finding top stories and trending headlines. Stories can optionally be filtered by various parameters.',
inputSchema: perigon.StoriesSearchOptionsSchema.pick({
q: true,
name: true,
from: true,
to: true,
sourceGroup: true,
category: true,
personName: true,
companyName: true,
companyDomain: true,
sortBy: true
})
})
async searchStories(opts: perigon.StoriesSearchOptions) {
const {
personWikidataId,
personName,
companyId,
companyDomain,
companySymbol,
...params
} = opts
const arrayParams = pruneUndefined({
personWikidataId: personWikidataId?.join(','),
personName: personName?.join(','),
companyId: companyId?.join(','),
companyDomain: companyDomain?.join(','),
companySymbol: companySymbol?.join(',')
})
return this.ky
.get('stories/all', {
searchParams: {
...arrayParams,
...params,
apiKey: this.apiKey,
...options,
size: Math.min(this._maxPageSize, options.size || this._maxPageSize)
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
}
})
.json<perigon.StoriesResponse>()
.json<perigon.StoriesSearchResponse>()
}
/**
* @see https://docs.goperigon.com/docs/people-data
* @see https://docs.goperigon.com/reference/people
*/
@aiFunction({
name: 'search_people',
description: 'Search for well-known people indexed by Perigon.',
inputSchema: perigon.PeopleSearchOptionsSchema
})
async searchPeople(opts: perigon.PeopleSearchOptions) {
const { wikidataId, occupationId, ...params } = opts
const arrayParams = pruneUndefined({
wikidataId: wikidataId?.join(','),
occupationId: occupationId?.join(',')
})
return this.ky
.get('people/all', {
searchParams: {
...arrayParams,
...params,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
}
})
.json<perigon.PeopleSearchResponse>()
}
/**
* @see https://docs.goperigon.com/docs/company-data
* @see https://docs.goperigon.com/reference/companies
*/
@aiFunction({
name: 'search_companies',
description:
'Search for companies indexed by Perigon. Includes public and private companies sourced from public records and Wikidata.',
inputSchema: perigon.CompanySearchOptionsSchema
})
async searchCompanies(opts: perigon.CompanySearchOptions) {
const { id, symbol, domain, ...params } = opts
const arrayParams = pruneUndefined({
id: id?.join(','),
domain: domain?.join(','),
symbol: symbol?.join(',')
})
return this.ky
.get('companies/all', {
searchParams: {
...arrayParams,
...params,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
}
})
.json<perigon.CompanySearchResponse>()
}
}