feat: add sanitizeSearchParams

pull/643/head^2
Travis Fischer 2024-06-03 12:26:46 -05:00
rodzic 94198f318b
commit ed85b708fd
8 zmienionych plików z 258 dodań i 255 usunięć

Wyświetl plik

@ -62,6 +62,7 @@
- provide a converter for langchain `DynamicStructuredTool`
- pull from other libs
- pull from [nango](https://docs.nango.dev/integrations/overview)
- https://github.com/causaly/zod-validation-error
## License

Wyświetl plik

@ -0,0 +1,15 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`sanitizeSearchParams 1`] = `"a=1&c=13"`;
exports[`sanitizeSearchParams 2`] = `"a=1&a=2&a=3"`;
exports[`sanitizeSearchParams 3`] = `"b=a&b=b&foo=true"`;
exports[`sanitizeSearchParams 4`] = `"b=false&b=true&b=false"`;
exports[`sanitizeSearchParams 5`] = `"flag=foo&flag=bar&flag=baz&token=test"`;
exports[`sanitizeSearchParams 6`] = `""`;
exports[`sanitizeSearchParams 7`] = `""`;

Wyświetl plik

@ -12,9 +12,6 @@ export interface PrivateAIFunctionMetadata {
description: string
inputSchema: z.AnyZodObject
methodName: string
// TODO
// pre and post
}
export abstract class AIFunctionsProvider {

Wyświetl plik

@ -3,7 +3,13 @@ import pThrottle from 'p-throttle'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, throttleKy } from '../utils.js'
import {
assert,
getEnv,
omit,
sanitizeSearchParams,
throttleKy
} from '../utils.js'
export namespace diffbot {
export const API_BASE_URL = 'https://api.diffbot.com'
@ -373,17 +379,17 @@ export namespace diffbot {
allUris?: string[]
// extra metadata
nbOrigins?: number
nbIncomingEdges?: number
nbFollowers?: number
educations?: Education[]
nationalities?: Nationality[]
allNames?: string[]
skills?: Skill[]
children?: Children[]
nbOrigins?: number
height?: number
image?: string
images?: Image[]
nbIncomingEdges?: number
nbFollowers?: number
allOriginHashes?: string[]
nameDetail?: NameDetail
parents?: Parent[]
@ -638,6 +644,18 @@ export namespace diffbot {
name: string
type: string
}
export function pruneEntity(entity: diffbot.Entity) {
return omit(
entity,
'allOriginHashes',
'locations',
'images',
'nationalities',
'awards',
'interests'
)
}
}
export class DiffbotClient extends AIFunctionsProvider {
@ -713,7 +731,7 @@ export class DiffbotClient extends AIFunctionsProvider {
@aiFunction({
name: 'diffbot_enhance_entity',
description:
'Enriches a person or organization entity given partial data. Enhance is an enrichment API to find a person or organization using partial data as input. Enhance scores several candidates against the submitted query and returns the best match. More information in the query helps Enhance models estimate with more confidence and will typically result in better matches and a higher score for the matches.',
'Resolves and enriches a partial person or organization entity.',
inputSchema: diffbot.EnhanceEntityOptionsSchema.omit({
refresh: true,
search: true,
@ -722,25 +740,16 @@ export class DiffbotClient extends AIFunctionsProvider {
})
})
async enhanceEntity(opts: diffbot.EnhanceEntityOptions) {
const { name, url, ...params } = opts
// TODO: clean this array handling up...
const arraySearchParams = [
name ? (Array.isArray(name) ? name : [name]).map((v) => ['name', v]) : [],
url?.map((v) => ['url', v])
]
.filter(Boolean)
.flat()
return this.kyKnowledgeGraph
const res = await this.kyKnowledgeGraph
.get('kg/v3/enhance', {
searchParams: new URLSearchParams([
...arraySearchParams,
...Object.entries(params).map(([key, value]) => [key, String(value)]),
['token', this.apiKey]
])
searchParams: sanitizeSearchParams({
...opts,
token: this.apiKey
})
})
.json<diffbot.EnhanceEntityResponse>()
return res.data.map((datum) => diffbot.pruneEntity(datum.entity))
}
async searchKnowledgeGraph(options: diffbot.KnowledgeGraphSearchOptions) {
@ -769,10 +778,10 @@ export class DiffbotClient extends AIFunctionsProvider {
T extends diffbot.ExtractResponse = diffbot.ExtractResponse
>(endpoint: string, options: diffbot.ExtractOptions): Promise<T> {
const { customJs, customHeaders, ...rest } = options
const searchParams: Record<string, any> = {
const searchParams = sanitizeSearchParams({
...rest,
token: this.apiKey
}
})
const headers = {
...Object.fromEntries(
[['X-Forward-X-Evaluate', customJs]].filter(([, value]) => value)
@ -780,12 +789,6 @@ export class DiffbotClient extends AIFunctionsProvider {
...customHeaders
}
for (const [key, value] of Object.entries(rest)) {
if (Array.isArray(value)) {
searchParams[key] = value.join(',')
}
}
// console.log(`DiffbotClient._extract: ${endpoint}`, searchParams)
return this.ky

Wyświetl plik

@ -3,7 +3,7 @@ import pThrottle from 'p-throttle'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, pruneUndefined, throttleKy } from '../utils.js'
import { assert, getEnv, sanitizeSearchParams, throttleKy } from '../utils.js'
// TODO: https://docs.goperigon.com/docs/searching-sources
// TODO: https://docs.goperigon.com/docs/journalist-data
@ -683,28 +683,10 @@ export class PerigonClient extends AIFunctionsProvider {
})
})
async searchArticles(opts: perigon.ArticlesSearchOptions) {
const {
personWikidataId,
personName,
companyId,
companyDomain,
companySymbol,
...params
} = opts
const arrayParams = pruneUndefined({
personWikidataId: personWikidataId?.join(','),
personName: personName?.join(','),
companyId: companyId?.join(','),
companyDomain: companyDomain?.join(','),
companySymbol: companySymbol?.join(',')
})
return this.ky
.get('all', {
searchParams: {
...arrayParams,
...params,
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
@ -713,7 +695,7 @@ export class PerigonClient extends AIFunctionsProvider {
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
}
})
})
.json<perigon.ArticlesSearchResponse>()
}
@ -740,28 +722,10 @@ export class PerigonClient extends AIFunctionsProvider {
})
})
async searchStories(opts: perigon.StoriesSearchOptions) {
const {
personWikidataId,
personName,
companyId,
companyDomain,
companySymbol,
...params
} = opts
const arrayParams = pruneUndefined({
personWikidataId: personWikidataId?.join(','),
personName: personName?.join(','),
companyId: companyId?.join(','),
companyDomain: companyDomain?.join(','),
companySymbol: companySymbol?.join(',')
})
return this.ky
.get('stories/all', {
searchParams: {
...arrayParams,
...params,
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
@ -770,7 +734,7 @@ export class PerigonClient extends AIFunctionsProvider {
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
}
})
})
.json<perigon.StoriesSearchResponse>()
}
@ -785,18 +749,10 @@ export class PerigonClient extends AIFunctionsProvider {
inputSchema: perigon.PeopleSearchOptionsSchema
})
async searchPeople(opts: perigon.PeopleSearchOptions) {
const { wikidataId, occupationId, ...params } = opts
const arrayParams = pruneUndefined({
wikidataId: wikidataId?.join(','),
occupationId: occupationId?.join(',')
})
return this.ky
.get('people/all', {
searchParams: {
...arrayParams,
...params,
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
@ -805,7 +761,7 @@ export class PerigonClient extends AIFunctionsProvider {
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
}
})
})
.json<perigon.PeopleSearchResponse>()
}
@ -821,19 +777,10 @@ export class PerigonClient extends AIFunctionsProvider {
inputSchema: perigon.CompanySearchOptionsSchema
})
async searchCompanies(opts: perigon.CompanySearchOptions) {
const { id, symbol, domain, ...params } = opts
const arrayParams = pruneUndefined({
id: id?.join(','),
domain: domain?.join(','),
symbol: symbol?.join(',')
})
return this.ky
.get('companies/all', {
searchParams: {
...arrayParams,
...params,
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
@ -842,7 +789,7 @@ export class PerigonClient extends AIFunctionsProvider {
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
}
})
})
.json<perigon.CompanySearchResponse>()
}

Wyświetl plik

@ -4,7 +4,15 @@ import { z } from 'zod'
import type { DeepNullable } from '../types.js'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, pruneUndefined, throttleKy } from '../utils.js'
import {
assert,
getEnv,
pruneUndefined,
sanitizeSearchParams,
throttleKy
} from '../utils.js'
// TODO: improve `domain` validation for fast-fail
export namespace predictleads {
// Allow up to 20 requests per minute by default.
@ -188,118 +196,124 @@ export namespace predictleads {
export type JobOpeningByIdResponse = Omit<JobOpeningResponse, 'meta'>
export const EventCategorySchema = z.union([
z
.literal('hires')
.describe(
'Company hired new executive or senior personnel. (leadership)'
),
z
.literal('promotes')
.describe(
'Company promoted existing executive or senior personnel. (leadership)'
),
z
.literal('leaves')
.describe('Executive or senior personnel left the company. (leadership)'),
z
.literal('retires')
.describe(
'Executive or senior personnel retires from the company. (leadership)'
),
z
.literal('acquires')
.describe('Company acquired other company. (acquisition)'),
z
.literal('merges_with')
.describe('Company merges with other company. (acquisition)'),
z
.literal('sells_assets_to')
.describe(
'Company sells assets (like properties or warehouses) to other company. (acquisition)'
),
z
.literal('expands_offices_to')
.describe(
'Company opens new offices in another town, state, country or continent. (expansion)'
),
z
.literal('expands_offices_in')
.describe('Company expands existing offices. (expansion)'),
z
.literal('expands_facilities')
.describe(
'Company opens new or expands existing facilities like warehouses, data centers, manufacturing plants etc. (expansion)'
),
z
.literal('opens_new_location')
.describe(
'Company opens new service location like hotels, restaurants, bars, hospitals etc. (expansion)'
),
z
.literal('increases_headcount_by')
.describe('Company offers new job vacancies. (expansion)'),
z
.literal('launches')
.describe('Company launches new offering. (new_offering)'),
z
.literal('integrates_with')
.describe('Company integrates with other company. (new_offering)'),
z
.literal('is_developing')
.describe('Company begins development of a new offering. (new_offering)'),
z
.literal('receives_financing')
.describe(
'Company receives investment like venture funding, loan, grant etc. (investment)'
),
z
.literal('invests_into')
.describe('Company invests into other company. (investment)'),
z
.literal('invests_into_assets')
.describe(
'Company invests into assets like property, trucks, facilities etc. (investment)'
),
z
.literal('goes_public')
.describe(
'Company issues shares to the public for the first time. (investment)'
),
z
.literal('closes_offices_in')
.describe('Company closes existing offices. (cost_cutting)'),
z
.literal('decreases_headcount_by')
.describe('Company lays off employees. (cost_cutting)'),
z
.literal('partners_with')
.describe('Company partners with other company. (partnership)'),
z
.literal('receives_award')
.describe(
'Company or person at the company receives an award. (recognition)'
),
z
.literal('recognized_as')
.describe(
'Company or person at the company receives recognition. (recognition)'
),
z
.literal('signs_new_client')
.describe('Company signs new client. (contract)'),
z
.literal('files_suit_against')
.describe(
'Company files suit against other company. (corporate_challenges)'
),
z
.literal('has_issues_with')
.describe('Company has vulnerability problems. (corporate_challenges)'),
z
.literal('identified_as_competitor_of')
.describe('New or existing competitor was identified. (relational)')
])
export const EventCategorySchema = z
.union([
z
.literal('hires')
.describe(
'Company hired new executive or senior personnel. (leadership)'
),
z
.literal('promotes')
.describe(
'Company promoted existing executive or senior personnel. (leadership)'
),
z
.literal('leaves')
.describe(
'Executive or senior personnel left the company. (leadership)'
),
z
.literal('retires')
.describe(
'Executive or senior personnel retires from the company. (leadership)'
),
z
.literal('acquires')
.describe('Company acquired other company. (acquisition)'),
z
.literal('merges_with')
.describe('Company merges with other company. (acquisition)'),
z
.literal('sells_assets_to')
.describe(
'Company sells assets (like properties or warehouses) to other company. (acquisition)'
),
z
.literal('expands_offices_to')
.describe(
'Company opens new offices in another town, state, country or continent. (expansion)'
),
z
.literal('expands_offices_in')
.describe('Company expands existing offices. (expansion)'),
z
.literal('expands_facilities')
.describe(
'Company opens new or expands existing facilities like warehouses, data centers, manufacturing plants etc. (expansion)'
),
z
.literal('opens_new_location')
.describe(
'Company opens new service location like hotels, restaurants, bars, hospitals etc. (expansion)'
),
z
.literal('increases_headcount_by')
.describe('Company offers new job vacancies. (expansion)'),
z
.literal('launches')
.describe('Company launches new offering. (new_offering)'),
z
.literal('integrates_with')
.describe('Company integrates with other company. (new_offering)'),
z
.literal('is_developing')
.describe(
'Company begins development of a new offering. (new_offering)'
),
z
.literal('receives_financing')
.describe(
'Company receives investment like venture funding, loan, grant etc. (investment)'
),
z
.literal('invests_into')
.describe('Company invests into other company. (investment)'),
z
.literal('invests_into_assets')
.describe(
'Company invests into assets like property, trucks, facilities etc. (investment)'
),
z
.literal('goes_public')
.describe(
'Company issues shares to the public for the first time. (investment)'
),
z
.literal('closes_offices_in')
.describe('Company closes existing offices. (cost_cutting)'),
z
.literal('decreases_headcount_by')
.describe('Company lays off employees. (cost_cutting)'),
z
.literal('partners_with')
.describe('Company partners with other company. (partnership)'),
z
.literal('receives_award')
.describe(
'Company or person at the company receives an award. (recognition)'
),
z
.literal('recognized_as')
.describe(
'Company or person at the company receives recognition. (recognition)'
),
z
.literal('signs_new_client')
.describe('Company signs new client. (contract)'),
z
.literal('files_suit_against')
.describe(
'Company files suit against other company. (corporate_challenges)'
),
z
.literal('has_issues_with')
.describe('Company has vulnerability problems. (corporate_challenges)'),
z
.literal('identified_as_competitor_of')
.describe('New or existing competitor was identified. (relational)')
])
.describe('Event category')
export type EventCategory = z.infer<typeof EventCategorySchema>
export const CompanyParamsSchema = z.object({
@ -535,17 +549,15 @@ export class PredictLeadsClient extends AIFunctionsProvider {
domain,
page = 1,
limit = predictleads.DEFAULT_PAGE_SIZE,
categories,
...params
} = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/events`, {
searchParams: pruneUndefined({
searchParams: sanitizeSearchParams({
page,
limit: String(limit),
categories: categories?.join(','),
limit,
...params
})
})
@ -586,19 +598,13 @@ export class PredictLeadsClient extends AIFunctionsProvider {
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const {
domain,
limit = predictleads.DEFAULT_PAGE_SIZE,
categories,
...params
} = opts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/job_openings`, {
searchParams: pruneUndefined({
limit: String(limit),
categories: categories?.join(','),
searchParams: sanitizeSearchParams({
limit,
...params
})
})
@ -621,19 +627,13 @@ export class PredictLeadsClient extends AIFunctionsProvider {
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const {
domain,
limit = predictleads.DEFAULT_PAGE_SIZE,
categories,
...params
} = opts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/technologies`, {
searchParams: pruneUndefined({
limit: String(limit),
categories: categories?.join(','),
searchParams: sanitizeSearchParams({
limit,
...params
})
})
@ -651,19 +651,13 @@ export class PredictLeadsClient extends AIFunctionsProvider {
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const {
domain,
limit = predictleads.DEFAULT_PAGE_SIZE,
categories,
...params
} = opts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/connections`, {
searchParams: pruneUndefined({
limit: String(limit),
categories: categories?.join(','),
searchParams: sanitizeSearchParams({
limit,
...params
})
})
@ -686,7 +680,7 @@ export class PredictLeadsClient extends AIFunctionsProvider {
return this.ky
.get(`v2/companies/${domain}/website_evolution`, {
searchParams: pruneUndefined({ limit: String(limit), ...params })
searchParams: sanitizeSearchParams({ limit, ...params })
})
.json<predictleads.Response>()
}
@ -707,7 +701,7 @@ export class PredictLeadsClient extends AIFunctionsProvider {
return this.ky
.get(`v2/companies/${domain}/github_repositories`, {
searchParams: pruneUndefined({ limit: String(limit), ...params })
searchParams: sanitizeSearchParams({ limit, ...params })
})
.json<predictleads.Response>()
}
@ -723,19 +717,13 @@ export class PredictLeadsClient extends AIFunctionsProvider {
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const {
domain,
sources,
limit = predictleads.DEFAULT_PAGE_SIZE,
...params
} = opts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/products`, {
searchParams: pruneUndefined({
limit: String(limit),
sources: sources?.join(','),
searchParams: sanitizeSearchParams({
limit,
...params
})
})
@ -783,7 +771,7 @@ export class PredictLeadsClient extends AIFunctionsProvider {
async getFollowingCompanies(limit: number = predictleads.DEFAULT_PAGE_SIZE) {
return this.ky
.get(`v2/followings`, {
searchParams: { limit: String(limit) }
searchParams: sanitizeSearchParams({ limit })
})
.json<predictleads.FollowedCompaniesResponse>()
}

Wyświetl plik

@ -3,7 +3,7 @@ import pThrottle from 'p-throttle'
import { expect, test } from 'vitest'
import { mockKyInstance } from './_utils.js'
import { omit, pick, throttleKy } from './utils.js'
import { omit, pick, sanitizeSearchParams, throttleKy } from './utils.js'
test('pick', () => {
expect(pick({ a: 1, b: 2, c: 3 }, 'a', 'c')).toEqual({ a: 1, c: 3 })
@ -19,6 +19,33 @@ test('omit', () => {
).toEqual({ a: { b: 'foo' }, d: -1 })
})
test('sanitizeSearchParams', () => {
expect(
sanitizeSearchParams({ a: 1, b: undefined, c: 13 }).toString()
).toMatchSnapshot()
expect(sanitizeSearchParams({ a: [1, 2, 3] }).toString()).toMatchSnapshot()
expect(
sanitizeSearchParams({ b: ['a', 'b'], foo: true }).toString()
).toMatchSnapshot()
expect(
sanitizeSearchParams({ b: [false, true, false] }).toString()
).toMatchSnapshot()
expect(
sanitizeSearchParams({
flag: ['foo', 'bar', 'baz'],
token: 'test'
}).toString()
).toMatchSnapshot()
expect(sanitizeSearchParams({}).toString()).toMatchSnapshot()
expect(sanitizeSearchParams({ a: [] }).toString()).toMatchSnapshot()
})
test(
'throttleKy should rate-limit requests to ky properly',
async () => {

Wyświetl plik

@ -86,3 +86,28 @@ export function throttleKy(
}
})
}
/**
* Creates a new `URLSearchParams` object with all values coerced to strings
* that correctly handles arrays of values as repeated keys.
*/
export function sanitizeSearchParams(
searchParams: Record<
string,
string | number | boolean | string[] | number[] | boolean[] | undefined
>
): URLSearchParams {
return new URLSearchParams(
Object.entries(searchParams).flatMap(([key, value]) => {
if (key === undefined || value === undefined) {
return []
}
if (Array.isArray(value)) {
return value.map((v) => [key, String(v)])
}
return [[key, String(value)]]
})
)
}