From 513a2aa294f57972d3a164ca4b58e62fc629c029 Mon Sep 17 00:00:00 2001 From: Travis Fischer Date: Sun, 9 Jun 2024 19:18:06 -0500 Subject: [PATCH] feat: minor improvements to diffbot, predictleads, and proxycurl --- legacy/package.json | 1 + legacy/src/services/diffbot-client.ts | 7 +- legacy/src/services/predict-leads-client.ts | 2 +- legacy/src/services/proxycurl-client.ts | 94 ++++++++++++++------- legacy/src/url-utils.ts | 24 ++++-- 5 files changed, 86 insertions(+), 42 deletions(-) diff --git a/legacy/package.json b/legacy/package.json index 6d9f5fbd..171a74ec 100644 --- a/legacy/package.json +++ b/legacy/package.json @@ -72,6 +72,7 @@ "scripts": { "preinstall": "npx only-allow pnpm", "build": "tsup", + "dev": "tsup --watch", "clean": "del dist", "prebuild": "run-s clean", "predev": "run-s clean", diff --git a/legacy/src/services/diffbot-client.ts b/legacy/src/services/diffbot-client.ts index 6e2db91d..2bbb39b5 100644 --- a/legacy/src/services/diffbot-client.ts +++ b/legacy/src/services/diffbot-client.ts @@ -1,3 +1,4 @@ +import type { Simplify } from 'type-fest' import defaultKy, { type KyInstance } from 'ky' import pThrottle from 'p-throttle' import { z } from 'zod' @@ -657,6 +658,8 @@ export namespace diffbot { 'interests' ) } + + export type PrunedEntity = Simplify> } /** @@ -747,7 +750,9 @@ export class DiffbotClient extends AIFunctionsProvider { threshold: true }) }) - async enhanceEntity(opts: diffbot.EnhanceEntityOptions) { + async enhanceEntity( + opts: diffbot.EnhanceEntityOptions + ): Promise { const res = await this.kyKnowledgeGraph .get('kg/v3/enhance', { searchParams: sanitizeSearchParams({ diff --git a/legacy/src/services/predict-leads-client.ts b/legacy/src/services/predict-leads-client.ts index 3973d631..b3b2fd3a 100644 --- a/legacy/src/services/predict-leads-client.ts +++ b/legacy/src/services/predict-leads-client.ts @@ -532,7 +532,7 @@ export class PredictLeadsClient extends AIFunctionsProvider { 'Returns basic information about a company given its `domain` like location, name, stock ticker, description, etc.', inputSchema: predictleads.CompanyParamsSchema }) - async company(domainOrOpts: string | predictleads.CompanyParams) { + async getCompany(domainOrOpts: string | predictleads.CompanyParams) { const opts = typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts const { domain } = opts diff --git a/legacy/src/services/proxycurl-client.ts b/legacy/src/services/proxycurl-client.ts index b83eccf0..0d184391 100644 --- a/legacy/src/services/proxycurl-client.ts +++ b/legacy/src/services/proxycurl-client.ts @@ -3,7 +3,7 @@ import pThrottle from 'p-throttle' import { z } from 'zod' import { aiFunction, AIFunctionsProvider } from '../fns.js' -import { assert, getEnv, throttleKy } from '../utils.js' +import { assert, getEnv, sanitizeSearchParams, throttleKy } from '../utils.js' // All proxycurl types are auto-generated from their openapi spec export namespace proxycurl { @@ -25,35 +25,48 @@ export namespace proxycurl { ]) export type CompanyType = z.infer + export const OptionalFieldSchema = z.enum(['exclude', 'include']).optional() + export type OptionalField = z.infer + + export const OptionalEnrichFieldSchema = z.enum(['skip', 'enrich']).optional() + export type OptionalEnrichField = z.infer + + export const UseCacheSchema = z.enum(['if-present', 'if-recent']).optional() + export type UseCache = z.infer + + export const FallbackToCacheSchema = z.enum(['on-error', 'never']).optional() + export type FallbackToCache = z.infer + export const CompanyProfileEndpointParamsQueryClassSchema = z.object({ - acquisitions: z.string().optional(), - categories: z.string().optional(), - exit_data: z.string().optional(), - extra: z.string().optional(), - fallback_to_cache: z.string().optional(), - funding_data: z.string().optional(), - resolve_numeric_id: z.string().optional(), url: z.string(), - use_cache: z.string().optional() + acquisitions: OptionalFieldSchema, + categories: OptionalFieldSchema, + exit_data: OptionalFieldSchema, + extra: OptionalFieldSchema, + funding_data: OptionalFieldSchema, + resolve_numeric_id: z.boolean().optional(), + fallback_to_cache: FallbackToCacheSchema, + use_cache: UseCacheSchema }) export type CompanyProfileEndpointParamsQueryClass = z.infer< typeof CompanyProfileEndpointParamsQueryClassSchema > export const PersonProfileEndpointParamsQueryClassSchema = z.object({ - extra: z.string().optional(), - facebook_profile_id: z.string().optional(), + // requires one of `facebook_profile_url`, `linkedin_profile_url`, or `twitter_profile_url` facebook_profile_url: z.string().optional(), - fallback_to_cache: z.string().optional(), - github_profile_id: z.string().optional(), - inferred_salary: z.string().optional(), linkedin_profile_url: z.string().optional(), - personal_contact_number: z.string().optional(), - personal_email: z.string().optional(), - skills: z.string().optional(), - twitter_profile_id: z.string().optional(), twitter_profile_url: z.string().optional(), - use_cache: z.string().optional() + facebook_profile_id: OptionalFieldSchema, + twitter_profile_id: OptionalFieldSchema, + extra: OptionalFieldSchema, + github_profile_id: OptionalFieldSchema, + inferred_salary: OptionalFieldSchema, + personal_contact_number: OptionalFieldSchema, + personal_email: OptionalFieldSchema, + skills: OptionalFieldSchema, + fallback_to_cache: FallbackToCacheSchema, + use_cache: UseCacheSchema }) export type PersonProfileEndpointParamsQueryClass = z.infer< typeof PersonProfileEndpointParamsQueryClassSchema @@ -63,12 +76,12 @@ export namespace proxycurl { company_domain: z .string() .describe('The domain URL of the company the person works at'), - enrich_profile: z.string().optional(), first_name: z.string(), last_name: z.string().optional(), location: z.string().optional(), similarity_checks: z.string().optional(), - title: z.string().optional() + title: z.string().optional(), + enrich_profile: OptionalEnrichFieldSchema }) export type PersonLookupEndpointParamsQueryClass = z.infer< typeof PersonLookupEndpointParamsQueryClassSchema @@ -77,7 +90,7 @@ export namespace proxycurl { export const RoleLookupEndpointParamsQueryClassSchema = z.object({ company_name: z.string(), role: z.string(), - enrich_profile: z.string().optional() + enrich_profile: OptionalEnrichFieldSchema }) export type RoleLookupEndpointParamsQueryClass = z.infer< typeof RoleLookupEndpointParamsQueryClassSchema @@ -87,7 +100,7 @@ export namespace proxycurl { company_domain: z.string().optional(), company_location: z.string().optional(), company_name: z.string().optional(), - enrich_profile: z.string().optional() + enrich_profile: OptionalEnrichFieldSchema }) export type CompanyLookupEndpointParamsQueryClass = z.infer< typeof CompanyLookupEndpointParamsQueryClassSchema @@ -95,7 +108,7 @@ export namespace proxycurl { export const ReverseEmailLookupEndpointParamsQueryClassSchema = z.object({ email: z.string(), - enrich_profile: z.string().optional(), + enrich_profile: OptionalEnrichFieldSchema, lookup_depth: z.string().optional() }) export type ReverseEmailLookupEndpointParamsQueryClass = z.infer< @@ -2068,7 +2081,12 @@ export class ProxycurlClient extends AIFunctionsProvider { ) { return this.ky .get('api/linkedin/company', { - searchParams: { ...opts } + searchParams: sanitizeSearchParams({ + funding_data: 'include', + exit_data: 'include', + extra_data: 'include', + ...opts + }) }) .json() } @@ -2084,7 +2102,7 @@ export class ProxycurlClient extends AIFunctionsProvider { ) { return this.ky .get('api/v2/linkedin', { - searchParams: { ...opts } + searchParams: sanitizeSearchParams(opts) }) .json() } @@ -2100,7 +2118,10 @@ export class ProxycurlClient extends AIFunctionsProvider { ) { return this.ky .get('api/linkedin/profile/resolve', { - searchParams: { ...opts } + searchParams: sanitizeSearchParams({ + enrich_profile: 'enrich', + ...opts + }) }) .json() } @@ -2116,7 +2137,10 @@ export class ProxycurlClient extends AIFunctionsProvider { ) { return this.ky .get('api/linkedin/profile/resolve/email', { - searchParams: { ...opts } + searchParams: sanitizeSearchParams({ + enrich_profile: 'enrich', + ...opts + }) }) .json() } @@ -2132,7 +2156,10 @@ export class ProxycurlClient extends AIFunctionsProvider { ) { return this.ky .get('api/find/company/role/', { - searchParams: { ...opts } + searchParams: sanitizeSearchParams({ + enrich_profile: 'enrich', + ...opts + }) }) .json() } @@ -2148,7 +2175,10 @@ export class ProxycurlClient extends AIFunctionsProvider { ) { return this.ky .get('api/linkedin/company/resolve', { - searchParams: { ...opts } + searchParams: sanitizeSearchParams({ + enrich_profile: 'enrich', + ...opts + }) }) .json() } @@ -2162,7 +2192,7 @@ export class ProxycurlClient extends AIFunctionsProvider { async searchCompanies(opts: proxycurl.CompanySearchEndpointParamsQueryClass) { return this.ky .get('api/v2/search/company', { - searchParams: { ...opts } + searchParams: sanitizeSearchParams(opts) }) .json() } @@ -2176,7 +2206,7 @@ export class ProxycurlClient extends AIFunctionsProvider { async searchPeople(opts: proxycurl.PersonSearchEndpointParamsQueryClass) { return this.ky .get('api/v2/search/person/', { - searchParams: { ...opts } + searchParams: sanitizeSearchParams(opts) }) .json() } diff --git a/legacy/src/url-utils.ts b/legacy/src/url-utils.ts index aabb7498..c4185c09 100644 --- a/legacy/src/url-utils.ts +++ b/legacy/src/url-utils.ts @@ -7,7 +7,7 @@ import QuickLRU from 'quick-lru' import { hashObject } from './utils.js' const protocolAllowList = new Set(['https:', 'http:']) -const normalizedUrlCache = new QuickLRU({ +const normalizedUrlCache = new QuickLRU({ maxSize: 4000 }) @@ -42,11 +42,11 @@ export function isRelativeUrl(url: string): boolean { export function normalizeUrl( url: string, options?: NormalizeUrlOptions -): string | null { - let normalizedUrl: string | null | undefined +): string | undefined { + let normalizedUrl: string | undefined if (!url || isRelativeUrl(url)) { - return null + return undefined } const opts = { @@ -71,18 +71,26 @@ export function normalizeUrl( normalizedUrl = normalizedUrlCache.get(cacheKey) if (normalizedUrl !== undefined) { - return normalizedUrl + if (normalizedUrl) { + return normalizedUrl + } else { + return undefined + } } normalizedUrl = normalizeUrlImpl(url, opts) if (!normalizeUrl) { - normalizedUrl = null + normalizedUrl = '' } } catch { // ignore invalid urls - normalizedUrl = null + normalizedUrl = '' } normalizedUrlCache.set(cacheKey, normalizedUrl!) - return normalizedUrl + if (normalizedUrl) { + return normalizedUrl + } else { + return undefined + } }