feat: improve SerpAPI robustness

old-agentic-v1^2
Travis Fischer 2023-06-16 00:23:04 -07:00
rodzic a9168fc2b9
commit dfc87fed3b
6 zmienionych plików z 90 dodań i 21 usunięć

Wyświetl plik

@ -353,10 +353,24 @@ interface SearchResult extends BaseResponse<GoogleParameters> {
inline_images?: InlineImage[] inline_images?: InlineImage[]
inline_people_also_search_for?: InlinePeopleAlsoSearchFor[] inline_people_also_search_for?: InlinePeopleAlsoSearchFor[]
related_questions?: SearchResultRelatedQuestion[] related_questions?: SearchResultRelatedQuestion[]
organic_results: OrganicResult[] organic_results?: OrganicResult[]
related_searches?: RelatedSearch[] related_searches?: RelatedSearch[]
pagination: Pagination pagination: Pagination
serpapi_pagination: Pagination serpapi_pagination: Pagination
twitter_results?: TwitterResults
}
interface TwitterResults {
title: string
link: string
displayed_link: string
tweets: Tweet[]
}
interface Tweet {
link: string
snippet: string
published_date: string
} }
interface AnswerBox { interface AnswerBox {
@ -647,16 +661,14 @@ export class SerpAPIClient {
} }
async search(queryOrOpts: string | GoogleParameters) { async search(queryOrOpts: string | GoogleParameters) {
const defaultGoogleParams: Partial<GoogleParameters> = { const defaultGoogleParams: Partial<GoogleParameters> = {}
num: 10
}
const options: GoogleParameters = const options: GoogleParameters =
typeof queryOrOpts === 'string' typeof queryOrOpts === 'string'
? { ...defaultGoogleParams, q: queryOrOpts } ? { ...defaultGoogleParams, q: queryOrOpts }
: queryOrOpts : queryOrOpts
const { timeout, ...rest } = this.params const { timeout, ...rest } = this.params
// console.log(options) // console.log('SerpAPIClient.search', options)
return this.api return this.api
.get('search', { .get('search', {
searchParams: { searchParams: {

Wyświetl plik

@ -92,7 +92,6 @@ export class DiffbotTool extends BaseTask<DiffbotInput, DiffbotOutput> {
}) })
this._logger.info(res, `Diffbot response for url "${ctx.input!.url}"`) this._logger.info(res, `Diffbot response for url "${ctx.input!.url}"`)
console.log(res)
const pickedRes = { const pickedRes = {
type: res.type, type: res.type,

Wyświetl plik

@ -3,6 +3,7 @@ import { z } from 'zod'
import * as types from '@/types' import * as types from '@/types'
import { SerpAPIClient } from '@/services/serpapi' import { SerpAPIClient } from '@/services/serpapi'
import { BaseTask } from '@/task' import { BaseTask } from '@/task'
import { normalizeUrl } from '@/url-utils'
export const SerpAPIInputSchema = z.object({ export const SerpAPIInputSchema = z.object({
query: z.string().describe('search query'), query: z.string().describe('search query'),
@ -33,10 +34,23 @@ export const SerpAPIKnowledgeGraph = z.object({
description: z.string().optional() description: z.string().optional()
}) })
export const SerpAPITweet = z.object({
link: z.string().optional(),
snippet: z.string().optional(),
published_date: z.string().optional()
})
export const SerpAPITwitterResults = z.object({
title: z.string().optional(),
displayed_link: z.string().optional(),
tweets: z.array(SerpAPITweet).optional()
})
export const SerpAPIOutputSchema = z.object({ export const SerpAPIOutputSchema = z.object({
knowledgeGraph: SerpAPIKnowledgeGraph.optional(), knowledge_graph: SerpAPIKnowledgeGraph.optional(),
answerBox: SerpAPIAnswerBox.optional(), answer_box: SerpAPIAnswerBox.optional(),
organicResults: z.array(SerpAPIOrganicSearchResult).optional() organic_results: z.array(SerpAPIOrganicSearchResult).optional(),
twitter_results: SerpAPITwitterResults.optional()
}) })
export type SerpAPIOutput = z.infer<typeof SerpAPIOutputSchema> export type SerpAPIOutput = z.infer<typeof SerpAPIOutputSchema>
@ -77,9 +91,10 @@ export class SerpAPITool extends BaseTask<SerpAPIInput, SerpAPIOutput> {
protected override async _call( protected override async _call(
ctx: types.TaskCallContext<SerpAPIInput> ctx: types.TaskCallContext<SerpAPIInput>
): Promise<SerpAPIOutput> { ): Promise<SerpAPIOutput> {
const { query, numResults = 10 } = ctx.input!
const res = await this._serpapiClient.search({ const res = await this._serpapiClient.search({
q: ctx.input!.query, q: query
num: ctx.input!.numResults
}) })
this._logger.debug( this._logger.debug(
@ -87,10 +102,23 @@ export class SerpAPITool extends BaseTask<SerpAPIInput, SerpAPIOutput> {
`SerpAPI response for query ${JSON.stringify(ctx.input, null, 2)}"` `SerpAPI response for query ${JSON.stringify(ctx.input, null, 2)}"`
) )
const twitterResults = res.twitter_results
? {
...res.twitter_results,
tweets: res.twitter_results.tweets?.map((tweet) => ({
...tweet,
link: normalizeUrl(tweet.link, {
removeQueryParameters: true
})
}))
}
: undefined
return this.outputSchema.parse({ return this.outputSchema.parse({
knowledgeGraph: res.knowledge_graph, knowledge_graph: res.knowledge_graph,
answerBox: res.answer_box, answer_box: res.answer_box,
organicResults: res.organic_results organic_results: res.organic_results?.slice(0, numResults),
twitter_results: twitterResults
}) })
} }
} }

Wyświetl plik

@ -1,5 +1,5 @@
import isRelativeUrl from 'is-relative-url' import isRelativeUrl from 'is-relative-url'
import normalizeUrlImpl from 'normalize-url' import normalizeUrlImpl, { type Options } from 'normalize-url'
import QuickLRU from 'quick-lru' import QuickLRU from 'quick-lru'
// const protocolAllowList = new Set(['https:', 'http:']) // const protocolAllowList = new Set(['https:', 'http:'])
@ -7,7 +7,7 @@ const normalizedUrlCache = new QuickLRU<string, string | null>({
maxSize: 4000 maxSize: 4000
}) })
export function normalizeUrl(url: string): string | null { export function normalizeUrl(url: string, options?: Options): string | null {
let normalizedUrl: string | null | undefined let normalizedUrl: string | null | undefined
try { try {
@ -15,6 +15,7 @@ export function normalizeUrl(url: string): string | null {
return null return null
} }
// TODO: caching doesn't take into account `options`
normalizedUrl = normalizedUrlCache.get(url) normalizedUrl = normalizedUrlCache.get(url)
if (normalizedUrl !== undefined) { if (normalizedUrl !== undefined) {
@ -28,11 +29,12 @@ export function normalizeUrl(url: string): string | null {
forceHttps: false, forceHttps: false,
stripHash: false, stripHash: false,
stripTextFragment: true, stripTextFragment: true,
removeQueryParameters: [/^utm_\w+/i, 'ref'], removeQueryParameters: [/^utm_\w+/i, 'ref', 'ref_src'],
removeTrailingSlash: true, removeTrailingSlash: true,
removeSingleSlash: true, removeSingleSlash: true,
removeExplicitPort: true, removeExplicitPort: true,
sortQueryParameters: true sortQueryParameters: true,
...options
}) })
} catch (err) { } catch (err) {
// ignore invalid urls // ignore invalid urls

Wyświetl plik

@ -106,7 +106,7 @@ export function chunkString(text: string, maxLength: number): string[] {
* @param json - JSON value to stringify * @param json - JSON value to stringify
* @returns stringified value with all double quotes around object keys removed * @returns stringified value with all double quotes around object keys removed
*/ */
export function stringifyForModel(json: types.TaskOutput): string { export function stringifyForModel(json: types.Jsonifiable): string {
const UNIQUE_PREFIX = defaultIDGeneratorFn() const UNIQUE_PREFIX = defaultIDGeneratorFn()
return ( return (
JSON.stringify(json, replacer) JSON.stringify(json, replacer)

Wyświetl plik

@ -1,8 +1,8 @@
import test from 'ava' import test from 'ava'
import { SerpAPIClient } from '@/services/serpapi' import { SerpAPIClient, SerpAPITool } from '@/index'
import { ky } from '../_utils' import { createTestAgenticRuntime, ky } from '../_utils'
test('SerpAPIClient.search - coffee', async (t) => { test('SerpAPIClient.search - coffee', async (t) => {
if (!process.env.SERPAPI_API_KEY) { if (!process.env.SERPAPI_API_KEY) {
@ -17,6 +17,19 @@ test('SerpAPIClient.search - coffee', async (t) => {
t.truthy(result.organic_results) t.truthy(result.organic_results)
}) })
test('SerpAPIClient.search - news', async (t) => {
if (!process.env.SERPAPI_API_KEY) {
return t.pass()
}
t.timeout(2 * 60 * 1000)
const client = new SerpAPIClient({ ky })
const result = await client.search('OpenAI news')
// console.log(JSON.stringify(result, null, 2))
t.truthy(result.organic_results)
})
test('SerpAPIClient.search - answer box', async (t) => { test('SerpAPIClient.search - answer box', async (t) => {
if (!process.env.SERPAPI_API_KEY) { if (!process.env.SERPAPI_API_KEY) {
return t.pass() return t.pass()
@ -31,3 +44,18 @@ test('SerpAPIClient.search - answer box', async (t) => {
// console.log(JSON.stringify(result, null, 2)) // console.log(JSON.stringify(result, null, 2))
t.truthy(result.answer_box) t.truthy(result.answer_box)
}) })
test('SerpAPITool - news', async (t) => {
if (!process.env.SERPAPI_API_KEY) {
return t.pass()
}
t.timeout(2 * 60 * 1000)
const agentic = createTestAgenticRuntime()
const client = new SerpAPIClient({ ky })
const tool = new SerpAPITool({ serpapi: client, agentic })
const result = await tool.call({ query: 'OpenAI news' })
// console.log(JSON.stringify(result, null, 2))
t.truthy(result.organic_results)
})