chatgpt-api/src/services/perigon-client.ts

795 wiersze
22 KiB
TypeScript

import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, sanitizeSearchParams, throttleKy } from '../utils.js'
// TODO: https://docs.goperigon.com/docs/searching-sources
// TODO: https://docs.goperigon.com/docs/journalist-data
// TODO: https://docs.goperigon.com/docs/topics
export namespace perigon {
// Allow up to 2 requests per second by default.
export const throttle = pThrottle({
limit: 2,
interval: 1000,
strict: true
})
export const DEFAULT_PAGE_SIZE = 10
export const MAX_PAGE_SIZE = 100
export const ArticleLabelSchema = z.union([
z.literal('Opinion'),
z.literal('Non-news'),
z.literal('Paid News'),
z.literal('Fact Check'),
z.literal('Pop Culture'),
z.literal('Roundup'),
z.literal('Press Release')
])
export type ArticleLabel = z.infer<typeof ArticleLabelSchema>
export const CategoriesSchema = z.union([
z.literal('Politics'),
z.literal('Tech'),
z.literal('Sports'),
z.literal('Business'),
z.literal('Finance'),
z.literal('Entertainment'),
z.literal('Health'),
z.literal('Weather'),
z.literal('Lifestyle'),
z.literal('Auto'),
z.literal('Science'),
z.literal('Travel'),
z.literal('Environment'),
z.literal('World'),
z.literal('General'),
z.literal('none')
])
export type Categories = z.infer<typeof CategoriesSchema>
export const SourceGroupSchema = z.union([
z.literal('top10').describe('Top 10 most popular sources globally'),
z.literal('top100').describe('Top 100 most popular sources globally'),
z
.literal('top500English')
.describe('Top 500 most popular (English) sources globally'),
z
.literal('top25crypto')
.describe(
'Top 25 most popular sources covering cryptocurrency & blockchain developments'
),
z
.literal('top25finance')
.describe(
'Top 25 most popular sources covering financial news, movement in the markets & public equities'
),
z
.literal('top50tech')
.describe('Top 50 sources covering new technology & businesses in tech'),
z
.literal('top100sports')
.describe(
'Top 100 most popular (English) sources covering sports of all types'
),
z
.literal('top100leftUS')
.describe(
'Top 100 most popular (US) sources with an average political bias rating of Left or Leans Left'
),
z
.literal('top100rightUS')
.describe(
'Top 100 most popular (US) sources with an average political bias rating of Right or Leans Right'
),
z
.literal('top100centerUS')
.describe(
'Top 100 most popular (US) sources with an average political bias rating of Center or Middle'
)
])
export type SourceGroup = z.infer<typeof SourceGroupSchema>
export const SortBySchema = z.union([
z.literal('date'),
z.literal('relevance'),
z.literal('addDate'),
z.literal('pubDate'),
z.literal('refreshDate')
])
export type SortBy = z.infer<typeof SortBySchema>
export const ArticlesSearchOptionsSchema = z.object({
q: z.string()
.describe(`Search query. It may use boolean operators (AND, OR, NOT) and quotes for exact matching. Example search queries:
- election news
- "elon musk" AND tesla
- (upcoming release OR launch) AND apple
- (Google OR Amazon) AND NOT ("Jeff Bezos" OR Android)
- "climate change"
`),
title: z
.string()
.optional()
.describe(
'Search query which applies only to article titles / headlines.'
),
desc: z.string().optional(),
content: z.string().optional(),
url: z.string().optional(),
from: z
.string()
.optional()
.describe(
'Filter to only return articles published after the specified date (ISO or "yyyy-mm-dd" format)'
),
to: z
.string()
.optional()
.describe(
'Filter to only return articles published before the specified date (ISO or "yyyy-mm-dd" format)'
),
addDateFrom: z.string().optional(),
addDateTo: z.string().optional(),
refreshDateFrom: z.string().optional(),
refreshDateTo: z.string().optional(),
articleId: z.string().optional(),
clusterId: z.string().optional(),
medium: z.union([z.literal('article'), z.literal('video')]).optional(),
source: z
.string()
.optional()
.describe("Filter articles from a specific publisher's source domain."),
sourceGroup: SourceGroupSchema.optional().describe(
'The source group to retrieve articles from.'
),
excludeSource: z
.string()
.optional()
.describe(
'Source website domains which should be excluded from the search. Wildcards (* and ?) are suported (e.g. "*.cnn.com").'
),
paywall: z
.boolean()
.optional()
.describe(
'Filter to show only results where the source has a paywall (true) or does not have a paywall (false).'
),
country: z
.string()
.optional()
.describe('Country code to filter by country.'),
language: z.string().optional(),
label: ArticleLabelSchema.optional().describe(
'Labels to filter by, could be "Opinion", "Paid-news", "Non-news", etc. If multiple parameters are passed, they will be applied as OR operations.'
),
excludeLabel: z
.union([ArticleLabelSchema, z.literal('Low Content')])
.optional()
.describe(
'Exclude results that include specific labels ("Opinion", "Non-news", "Paid News", etc.). You can filter multiple by repeating the parameter.'
),
byline: z.string().optional(),
topic: z.string().optional(),
category: CategoriesSchema.optional().describe(
'Filter by categories. Categories are general themes that the article is about. Examples of categories: Tech, Politics, etc. If multiple parameters are passed, they will be applied as OR operations. Use "none" to search uncategorized articles.'
),
journalistId: z.string().optional(),
state: z
.string()
.optional()
.describe(
'Filters articles where a specified state plays a central role in the content, beyond mere mentions, to ensure the results are deeply relevant to the state in question.'
),
city: z
.string()
.optional()
.describe(
'Filters articles where a specified city plays a central role in the content, beyond mere mentions, to ensure the results are deeply relevant to the urban area in question.'
),
area: z
.string()
.optional()
.describe(
'Filters articles where a specified area, such as a neighborhood, borough, or district, plays a central role in the content, beyond mere mentions, to ensure the results are deeply relevant to the area in question.'
),
location: z.string().optional(),
sortBy: SortBySchema.default('relevance')
.optional()
.describe('How to sort the article results.'),
showReprints: z
.boolean()
.optional()
.describe(
'Whether to return reprints in the response or not. Reprints are usually wired articles from sources like AP or Reuters that are reprinted in multiple sources at the same time. By default, this parameter is "true".'
),
showNumResults: z.boolean().optional(),
type: z
.union([z.literal('all'), z.literal('local'), z.literal('world')])
.optional(),
linkTo: z.string().optional(),
reprintGroupId: z.string().optional(),
personWikidataId: z.array(z.string()).optional(),
personName: z
.array(z.string())
.optional()
.describe('List of person names for exact matches.'),
companyId: z.array(z.string()).optional(),
companyName: z.string().optional().describe('Search by company name.'),
companyDomain: z
.array(z.string())
.optional()
.describe('Search by company domain.'),
companySymbol: z
.array(z.string())
.optional()
.describe('Search by company stock ticker symbol.'),
maxDistance: z.number().optional(),
lat: z.number().optional(),
lon: z.number().optional(),
searchTranslation: z
.boolean()
.optional()
.describe(
'Expand a query to search the translation, translatedTitle, and translatedDescription fields for non-English articles.'
),
page: z
.number()
.int()
.positive()
.max(10_000)
.default(0)
.optional()
.describe('Page number of results to return (zero-based)'),
size: z
.number()
.int()
.positive()
.max(DEFAULT_PAGE_SIZE)
.optional()
.describe('Number of results to return per page')
})
export type ArticlesSearchOptions = z.infer<
typeof ArticlesSearchOptionsSchema
>
export const StoriesSearchOptionsSchema = ArticlesSearchOptionsSchema.pick({
q: true,
clusterId: true,
topic: true,
category: true,
from: true,
to: true,
state: true,
city: true,
area: true,
showNumResults: true,
page: true,
size: true,
sourceGroup: true,
personWikidataId: true,
personName: true,
companyId: true,
companyName: true,
companyDomain: true,
companySymbol: true
}).extend({
name: z.string().optional().describe('Search stories by name.'),
nameExists: z.boolean().optional(),
initializedFrom: z.string().optional(),
initializedTo: z.string().optional(),
updatedFrom: z.string().optional(),
updatedTo: z.string().optional(),
minClusterSize: z.number().optional(),
maxClusterSize: z.number().optional(),
showDuplicates: z
.boolean()
.optional()
.describe(
'Stories are deduplicated by default. If a story is deduplicated, all future articles are merged into the original story. `duplicateOf` field contains the original cluster id. When showDuplicates=true, all stories are shown.'
),
sortBy: z
.union([
z.literal('count'),
z.literal('createdAt'),
z.literal('updatedAt')
])
.optional()
.describe('How to sort the results.')
})
export type StoriesSearchOptions = z.infer<typeof StoriesSearchOptionsSchema>
export const PeopleSearchOptionsSchema = z.object({
name: z
.string()
.describe(
'Person name query to search for. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
wikidataId: z
.array(z.string())
.optional()
.describe('Search by ID of Wikidata entity.'),
occupationId: z
.array(z.string())
.optional()
.describe('Search by Wikidata occupation ID.'),
occupationLabel: z
.string()
.optional()
.describe('Search by occupation name.'),
size: z
.number()
.int()
.positive()
.max(DEFAULT_PAGE_SIZE)
.optional()
.describe('Number of results to return per page')
})
export type PeopleSearchOptions = z.infer<typeof PeopleSearchOptionsSchema>
export const CompanySearchOptionsSchema = z.object({
q: z
.string()
.optional()
.describe(
'Company search query. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
name: z
.string()
.optional()
.describe(
'Search by company name. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
industry: z
.string()
.optional()
.describe(
'Search by company industry. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
sector: z
.string()
.optional()
.describe(
'Search by company sector. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
id: z.array(z.string()).optional().describe('Search by company ID.'),
symbol: z
.array(z.string())
.optional()
.describe('Search by company stock ticker symbol.'),
domain: z
.array(z.string())
.optional()
.describe('Search by company domain.'),
country: z.string().optional().describe('Search by country.'),
exchange: z.string().optional().describe('Search by exchange name.'),
numEmployeesFrom: z
.number()
.int()
.positive()
.optional()
.describe('Minimum number of employees.'),
numEmployeesTo: z
.number()
.int()
.positive()
.optional()
.describe('Maximum number of employees.'),
ipoFrom: z
.string()
.optional()
.describe('Starting IPO date (ISO or "yyyy-mm-dd" format)'),
ipoTo: z
.string()
.optional()
.describe('Ending IPO date (ISO or "yyyy-mm-dd" format)'),
size: z
.number()
.int()
.positive()
.max(DEFAULT_PAGE_SIZE)
.optional()
.describe('Number of results to return per page')
})
export type CompanySearchOptions = z.infer<typeof CompanySearchOptionsSchema>
export type ArticlesSearchResponse = {
status: number
numResults: number
articles: Article[]
}
export type Article = {
url: string
authorsByline: string
articleId: string
clusterId: string
source: {
domain: string
}
imageUrl: string
country: string
language: string
pubDate: string
addDate: string
refreshDate: string
score: number
title: string
description: string
content: string
medium: string
links: string[]
labels: string[]
matchedAuthors: string[]
claim: string
verdict: string
keywords: {
name: string
weight: number
}[]
topics: {
name: string
}[]
categories: {
name: string
}[]
entities: {
data: string
type: string
mentions: number
}[]
sentiment: {
positive: number
negative: number
neutral: number
}
summary: string
translation: string
locations: string[]
reprint: boolean
reprintGroupId: string
places: null
}
export type StoriesSearchResponse = {
status: number
numResults: number
results: Story[]
}
export type Story = {
createdAt: string
updatedAt: string
initializedAt: string
id: string
name: string
summary: string
summaryReferences: Array<any>
keyPoints: Array<{
point: string
references: Array<string>
}>
sentiment: {
positive: number
negative: number
neutral: number
}
uniqueCount: number
reprintCount: number
totalCount: number
countries: Array<{
name: string
count: number
}>
topCountries: Array<string>
topics: Array<{
name: string
count: number
}>
topTopics: Array<{ name: string }>
categories: Array<{
name: string
count: number
}>
topCategories: Array<{ name: string }>
people: Array<{ wikidataId: string; name: string; count: number }>
topPeople: Array<{ wikidataId: string; name: string }>
companies: Array<{
id: string
name: string
domains: Array<string>
symbols: Array<string>
count: number
}>
topCompanies: Array<{
id: string
name: string
domains: Array<string>
symbols: Array<string>
}>
locations: Array<{
state: string
city?: string
area?: string
county?: string
count: number
}>
topLocations: Array<{
state: string
city?: string
area?: string
county?: string
}>
}
export interface PeopleSearchResponse {
status: number
numResults: number
results: Person[]
}
export interface Person {
wikidataId: string
name: string
gender: Gender
dateOfBirth: DateOfBirth
dateOfDeath: any
description: string
aliases: string[]
occupation: Occupation[]
position: Position[]
politicalParty: PoliticalParty[]
image?: Image
abstract: string
}
export interface Gender {
wikidataId: string
label: string
}
export interface DateOfBirth {
time: string
precision: string
}
export interface Occupation {
wikidataId: string
label: string
}
export interface Position {
wikidataId: string
label: string
startTime: any
endTime: any
employer: any
}
export interface PoliticalParty {
wikidataId: string
label: string
startTime: any
endTime: any
}
export interface Image {
url: string
}
export interface CompanySearchResponse {
status: number
numResults: number
results: Company[]
}
export interface Company {
id: string
name: string
altNames: string[]
domains: string[]
monthlyVisits: number
globalRank?: number
description: string
ceo: any
industry: string
sector: any
country: string
fullTimeEmployees?: number
address: any
city: any
state: any
zip: any
logo?: string
favicon?: string
isEtf: boolean
isActivelyTrading: any
isFund: boolean
isAdr: boolean
symbols: any[]
}
}
/**
* **The intelligent news API**
*
* Real-time global news and web content data from 140,000+ sources.
*
* - search news articles
* - search news stories (clusters of related news articles)
* - search people, companies, topics, and journalists
*
* @see https://www.goperigon.com/products/news-api
*/
export class PerigonClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
constructor({
apiKey = getEnv('PERIGON_API_KEY'),
timeoutMs = 30_000,
throttle = true,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
throttle?: boolean
timeoutMs?: number
ky?: KyInstance
} = {}) {
assert(
apiKey,
'PerigonClient missing required "apiKey" (defaults to "PERIGON_API_KEY")'
)
super()
this.apiKey = apiKey
const throttledKy = throttle ? throttleKy(ky, perigon.throttle) : ky
this.ky = throttledKy.extend({
prefixUrl: 'https://api.goperigon.com/v1/',
timeout: timeoutMs
})
}
/**
* @see https://docs.goperigon.com/docs/overview
* @see https://docs.goperigon.com/reference/all-news
*/
@aiFunction({
name: 'search_news_articles',
description:
'Search for news articles indexed by Perigon. Articles can optionally be filtered by various parameters.',
inputSchema: perigon.ArticlesSearchOptionsSchema.pick({
q: true,
title: true,
from: true,
to: true,
source: true,
sourceGroup: true,
excludeSource: true,
category: true,
personName: true,
companyName: true,
companyDomain: true,
sortBy: true
})
})
async searchArticles(opts: perigon.ArticlesSearchOptions) {
return this.ky
.get('all', {
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
})
})
.json<perigon.ArticlesSearchResponse>()
}
/**
* @see https://docs.goperigon.com/docs/stories-overview
* @see https://docs.goperigon.com/reference/stories-1
*/
@aiFunction({
name: 'search_news_stories',
description:
'Search for news stories indexed by Perigon. Stories are clusters of related news articles and are useful for finding top stories and trending headlines. Stories can optionally be filtered by various parameters.',
inputSchema: perigon.StoriesSearchOptionsSchema.pick({
q: true,
name: true,
from: true,
to: true,
sourceGroup: true,
category: true,
personName: true,
companyName: true,
companyDomain: true,
sortBy: true
})
})
async searchStories(opts: perigon.StoriesSearchOptions) {
return this.ky
.get('stories/all', {
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
})
})
.json<perigon.StoriesSearchResponse>()
}
/**
* @see https://docs.goperigon.com/docs/people-data
* @see https://docs.goperigon.com/reference/people
*/
@aiFunction({
name: 'search_people',
description: 'Search for well-known people indexed by Perigon.',
inputSchema: perigon.PeopleSearchOptionsSchema
})
async searchPeople(opts: perigon.PeopleSearchOptions) {
return this.ky
.get('people/all', {
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
})
})
.json<perigon.PeopleSearchResponse>()
}
/**
* @see https://docs.goperigon.com/docs/company-data
* @see https://docs.goperigon.com/reference/companies
*/
@aiFunction({
name: 'search_companies',
description:
'Search for companies indexed by Perigon. Includes public and private companies sourced from public records and Wikidata.',
inputSchema: perigon.CompanySearchOptionsSchema
})
async searchCompanies(opts: perigon.CompanySearchOptions) {
return this.ky
.get('companies/all', {
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
})
})
.json<perigon.CompanySearchResponse>()
}
}