From dfc87fed3ba622e53b4aaee2da123ef09a4fe195 Mon Sep 17 00:00:00 2001 From: Travis Fischer Date: Fri, 16 Jun 2023 00:23:04 -0700 Subject: [PATCH] feat: improve SerpAPI robustness --- src/services/serpapi.ts | 22 ++++++++++++++---- src/tools/diffbot.ts | 1 - src/tools/serpapi.ts | 44 ++++++++++++++++++++++++++++------- src/url-utils.ts | 10 ++++---- src/utils.ts | 2 +- test/services/serpapi.test.ts | 32 +++++++++++++++++++++++-- 6 files changed, 90 insertions(+), 21 deletions(-) diff --git a/src/services/serpapi.ts b/src/services/serpapi.ts index ccf9985..a74624f 100644 --- a/src/services/serpapi.ts +++ b/src/services/serpapi.ts @@ -353,10 +353,24 @@ interface SearchResult extends BaseResponse { inline_images?: InlineImage[] inline_people_also_search_for?: InlinePeopleAlsoSearchFor[] related_questions?: SearchResultRelatedQuestion[] - organic_results: OrganicResult[] + organic_results?: OrganicResult[] related_searches?: RelatedSearch[] pagination: Pagination serpapi_pagination: Pagination + twitter_results?: TwitterResults +} + +interface TwitterResults { + title: string + link: string + displayed_link: string + tweets: Tweet[] +} + +interface Tweet { + link: string + snippet: string + published_date: string } interface AnswerBox { @@ -647,16 +661,14 @@ export class SerpAPIClient { } async search(queryOrOpts: string | GoogleParameters) { - const defaultGoogleParams: Partial = { - num: 10 - } + const defaultGoogleParams: Partial = {} const options: GoogleParameters = typeof queryOrOpts === 'string' ? { ...defaultGoogleParams, q: queryOrOpts } : queryOrOpts const { timeout, ...rest } = this.params - // console.log(options) + // console.log('SerpAPIClient.search', options) return this.api .get('search', { searchParams: { diff --git a/src/tools/diffbot.ts b/src/tools/diffbot.ts index 262e072..a110b1c 100644 --- a/src/tools/diffbot.ts +++ b/src/tools/diffbot.ts @@ -92,7 +92,6 @@ export class DiffbotTool extends BaseTask { }) this._logger.info(res, `Diffbot response for url "${ctx.input!.url}"`) - console.log(res) const pickedRes = { type: res.type, diff --git a/src/tools/serpapi.ts b/src/tools/serpapi.ts index 22124f5..caf1d0b 100644 --- a/src/tools/serpapi.ts +++ b/src/tools/serpapi.ts @@ -3,6 +3,7 @@ import { z } from 'zod' import * as types from '@/types' import { SerpAPIClient } from '@/services/serpapi' import { BaseTask } from '@/task' +import { normalizeUrl } from '@/url-utils' export const SerpAPIInputSchema = z.object({ query: z.string().describe('search query'), @@ -33,10 +34,23 @@ export const SerpAPIKnowledgeGraph = z.object({ description: z.string().optional() }) +export const SerpAPITweet = z.object({ + link: z.string().optional(), + snippet: z.string().optional(), + published_date: z.string().optional() +}) + +export const SerpAPITwitterResults = z.object({ + title: z.string().optional(), + displayed_link: z.string().optional(), + tweets: z.array(SerpAPITweet).optional() +}) + export const SerpAPIOutputSchema = z.object({ - knowledgeGraph: SerpAPIKnowledgeGraph.optional(), - answerBox: SerpAPIAnswerBox.optional(), - organicResults: z.array(SerpAPIOrganicSearchResult).optional() + knowledge_graph: SerpAPIKnowledgeGraph.optional(), + answer_box: SerpAPIAnswerBox.optional(), + organic_results: z.array(SerpAPIOrganicSearchResult).optional(), + twitter_results: SerpAPITwitterResults.optional() }) export type SerpAPIOutput = z.infer @@ -77,9 +91,10 @@ export class SerpAPITool extends BaseTask { protected override async _call( ctx: types.TaskCallContext ): Promise { + const { query, numResults = 10 } = ctx.input! + const res = await this._serpapiClient.search({ - q: ctx.input!.query, - num: ctx.input!.numResults + q: query }) this._logger.debug( @@ -87,10 +102,23 @@ export class SerpAPITool extends BaseTask { `SerpAPI response for query ${JSON.stringify(ctx.input, null, 2)}"` ) + const twitterResults = res.twitter_results + ? { + ...res.twitter_results, + tweets: res.twitter_results.tweets?.map((tweet) => ({ + ...tweet, + link: normalizeUrl(tweet.link, { + removeQueryParameters: true + }) + })) + } + : undefined + return this.outputSchema.parse({ - knowledgeGraph: res.knowledge_graph, - answerBox: res.answer_box, - organicResults: res.organic_results + knowledge_graph: res.knowledge_graph, + answer_box: res.answer_box, + organic_results: res.organic_results?.slice(0, numResults), + twitter_results: twitterResults }) } } diff --git a/src/url-utils.ts b/src/url-utils.ts index ee54586..62b93ad 100644 --- a/src/url-utils.ts +++ b/src/url-utils.ts @@ -1,5 +1,5 @@ import isRelativeUrl from 'is-relative-url' -import normalizeUrlImpl from 'normalize-url' +import normalizeUrlImpl, { type Options } from 'normalize-url' import QuickLRU from 'quick-lru' // const protocolAllowList = new Set(['https:', 'http:']) @@ -7,7 +7,7 @@ const normalizedUrlCache = new QuickLRU({ maxSize: 4000 }) -export function normalizeUrl(url: string): string | null { +export function normalizeUrl(url: string, options?: Options): string | null { let normalizedUrl: string | null | undefined try { @@ -15,6 +15,7 @@ export function normalizeUrl(url: string): string | null { return null } + // TODO: caching doesn't take into account `options` normalizedUrl = normalizedUrlCache.get(url) if (normalizedUrl !== undefined) { @@ -28,11 +29,12 @@ export function normalizeUrl(url: string): string | null { forceHttps: false, stripHash: false, stripTextFragment: true, - removeQueryParameters: [/^utm_\w+/i, 'ref'], + removeQueryParameters: [/^utm_\w+/i, 'ref', 'ref_src'], removeTrailingSlash: true, removeSingleSlash: true, removeExplicitPort: true, - sortQueryParameters: true + sortQueryParameters: true, + ...options }) } catch (err) { // ignore invalid urls diff --git a/src/utils.ts b/src/utils.ts index 819a2e8..879432d 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -106,7 +106,7 @@ export function chunkString(text: string, maxLength: number): string[] { * @param json - JSON value to stringify * @returns stringified value with all double quotes around object keys removed */ -export function stringifyForModel(json: types.TaskOutput): string { +export function stringifyForModel(json: types.Jsonifiable): string { const UNIQUE_PREFIX = defaultIDGeneratorFn() return ( JSON.stringify(json, replacer) diff --git a/test/services/serpapi.test.ts b/test/services/serpapi.test.ts index 110cfd8..5977222 100644 --- a/test/services/serpapi.test.ts +++ b/test/services/serpapi.test.ts @@ -1,8 +1,8 @@ import test from 'ava' -import { SerpAPIClient } from '@/services/serpapi' +import { SerpAPIClient, SerpAPITool } from '@/index' -import { ky } from '../_utils' +import { createTestAgenticRuntime, ky } from '../_utils' test('SerpAPIClient.search - coffee', async (t) => { if (!process.env.SERPAPI_API_KEY) { @@ -17,6 +17,19 @@ test('SerpAPIClient.search - coffee', async (t) => { t.truthy(result.organic_results) }) +test('SerpAPIClient.search - news', async (t) => { + if (!process.env.SERPAPI_API_KEY) { + return t.pass() + } + + t.timeout(2 * 60 * 1000) + const client = new SerpAPIClient({ ky }) + + const result = await client.search('OpenAI news') + // console.log(JSON.stringify(result, null, 2)) + t.truthy(result.organic_results) +}) + test('SerpAPIClient.search - answer box', async (t) => { if (!process.env.SERPAPI_API_KEY) { return t.pass() @@ -31,3 +44,18 @@ test('SerpAPIClient.search - answer box', async (t) => { // console.log(JSON.stringify(result, null, 2)) t.truthy(result.answer_box) }) + +test('SerpAPITool - news', async (t) => { + if (!process.env.SERPAPI_API_KEY) { + return t.pass() + } + + t.timeout(2 * 60 * 1000) + const agentic = createTestAgenticRuntime() + const client = new SerpAPIClient({ ky }) + const tool = new SerpAPITool({ serpapi: client, agentic }) + + const result = await tool.call({ query: 'OpenAI news' }) + // console.log(JSON.stringify(result, null, 2)) + t.truthy(result.organic_results) +})