kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
feat: improve SerpAPI robustness
rodzic
a9168fc2b9
commit
dfc87fed3b
|
@ -353,10 +353,24 @@ interface SearchResult extends BaseResponse<GoogleParameters> {
|
|||
inline_images?: InlineImage[]
|
||||
inline_people_also_search_for?: InlinePeopleAlsoSearchFor[]
|
||||
related_questions?: SearchResultRelatedQuestion[]
|
||||
organic_results: OrganicResult[]
|
||||
organic_results?: OrganicResult[]
|
||||
related_searches?: RelatedSearch[]
|
||||
pagination: Pagination
|
||||
serpapi_pagination: Pagination
|
||||
twitter_results?: TwitterResults
|
||||
}
|
||||
|
||||
interface TwitterResults {
|
||||
title: string
|
||||
link: string
|
||||
displayed_link: string
|
||||
tweets: Tweet[]
|
||||
}
|
||||
|
||||
interface Tweet {
|
||||
link: string
|
||||
snippet: string
|
||||
published_date: string
|
||||
}
|
||||
|
||||
interface AnswerBox {
|
||||
|
@ -647,16 +661,14 @@ export class SerpAPIClient {
|
|||
}
|
||||
|
||||
async search(queryOrOpts: string | GoogleParameters) {
|
||||
const defaultGoogleParams: Partial<GoogleParameters> = {
|
||||
num: 10
|
||||
}
|
||||
const defaultGoogleParams: Partial<GoogleParameters> = {}
|
||||
const options: GoogleParameters =
|
||||
typeof queryOrOpts === 'string'
|
||||
? { ...defaultGoogleParams, q: queryOrOpts }
|
||||
: queryOrOpts
|
||||
const { timeout, ...rest } = this.params
|
||||
|
||||
// console.log(options)
|
||||
// console.log('SerpAPIClient.search', options)
|
||||
return this.api
|
||||
.get('search', {
|
||||
searchParams: {
|
||||
|
|
|
@ -92,7 +92,6 @@ export class DiffbotTool extends BaseTask<DiffbotInput, DiffbotOutput> {
|
|||
})
|
||||
|
||||
this._logger.info(res, `Diffbot response for url "${ctx.input!.url}"`)
|
||||
console.log(res)
|
||||
|
||||
const pickedRes = {
|
||||
type: res.type,
|
||||
|
|
|
@ -3,6 +3,7 @@ import { z } from 'zod'
|
|||
import * as types from '@/types'
|
||||
import { SerpAPIClient } from '@/services/serpapi'
|
||||
import { BaseTask } from '@/task'
|
||||
import { normalizeUrl } from '@/url-utils'
|
||||
|
||||
export const SerpAPIInputSchema = z.object({
|
||||
query: z.string().describe('search query'),
|
||||
|
@ -33,10 +34,23 @@ export const SerpAPIKnowledgeGraph = z.object({
|
|||
description: z.string().optional()
|
||||
})
|
||||
|
||||
export const SerpAPITweet = z.object({
|
||||
link: z.string().optional(),
|
||||
snippet: z.string().optional(),
|
||||
published_date: z.string().optional()
|
||||
})
|
||||
|
||||
export const SerpAPITwitterResults = z.object({
|
||||
title: z.string().optional(),
|
||||
displayed_link: z.string().optional(),
|
||||
tweets: z.array(SerpAPITweet).optional()
|
||||
})
|
||||
|
||||
export const SerpAPIOutputSchema = z.object({
|
||||
knowledgeGraph: SerpAPIKnowledgeGraph.optional(),
|
||||
answerBox: SerpAPIAnswerBox.optional(),
|
||||
organicResults: z.array(SerpAPIOrganicSearchResult).optional()
|
||||
knowledge_graph: SerpAPIKnowledgeGraph.optional(),
|
||||
answer_box: SerpAPIAnswerBox.optional(),
|
||||
organic_results: z.array(SerpAPIOrganicSearchResult).optional(),
|
||||
twitter_results: SerpAPITwitterResults.optional()
|
||||
})
|
||||
export type SerpAPIOutput = z.infer<typeof SerpAPIOutputSchema>
|
||||
|
||||
|
@ -77,9 +91,10 @@ export class SerpAPITool extends BaseTask<SerpAPIInput, SerpAPIOutput> {
|
|||
protected override async _call(
|
||||
ctx: types.TaskCallContext<SerpAPIInput>
|
||||
): Promise<SerpAPIOutput> {
|
||||
const { query, numResults = 10 } = ctx.input!
|
||||
|
||||
const res = await this._serpapiClient.search({
|
||||
q: ctx.input!.query,
|
||||
num: ctx.input!.numResults
|
||||
q: query
|
||||
})
|
||||
|
||||
this._logger.debug(
|
||||
|
@ -87,10 +102,23 @@ export class SerpAPITool extends BaseTask<SerpAPIInput, SerpAPIOutput> {
|
|||
`SerpAPI response for query ${JSON.stringify(ctx.input, null, 2)}"`
|
||||
)
|
||||
|
||||
const twitterResults = res.twitter_results
|
||||
? {
|
||||
...res.twitter_results,
|
||||
tweets: res.twitter_results.tweets?.map((tweet) => ({
|
||||
...tweet,
|
||||
link: normalizeUrl(tweet.link, {
|
||||
removeQueryParameters: true
|
||||
})
|
||||
}))
|
||||
}
|
||||
: undefined
|
||||
|
||||
return this.outputSchema.parse({
|
||||
knowledgeGraph: res.knowledge_graph,
|
||||
answerBox: res.answer_box,
|
||||
organicResults: res.organic_results
|
||||
knowledge_graph: res.knowledge_graph,
|
||||
answer_box: res.answer_box,
|
||||
organic_results: res.organic_results?.slice(0, numResults),
|
||||
twitter_results: twitterResults
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import isRelativeUrl from 'is-relative-url'
|
||||
import normalizeUrlImpl from 'normalize-url'
|
||||
import normalizeUrlImpl, { type Options } from 'normalize-url'
|
||||
import QuickLRU from 'quick-lru'
|
||||
|
||||
// const protocolAllowList = new Set(['https:', 'http:'])
|
||||
|
@ -7,7 +7,7 @@ const normalizedUrlCache = new QuickLRU<string, string | null>({
|
|||
maxSize: 4000
|
||||
})
|
||||
|
||||
export function normalizeUrl(url: string): string | null {
|
||||
export function normalizeUrl(url: string, options?: Options): string | null {
|
||||
let normalizedUrl: string | null | undefined
|
||||
|
||||
try {
|
||||
|
@ -15,6 +15,7 @@ export function normalizeUrl(url: string): string | null {
|
|||
return null
|
||||
}
|
||||
|
||||
// TODO: caching doesn't take into account `options`
|
||||
normalizedUrl = normalizedUrlCache.get(url)
|
||||
|
||||
if (normalizedUrl !== undefined) {
|
||||
|
@ -28,11 +29,12 @@ export function normalizeUrl(url: string): string | null {
|
|||
forceHttps: false,
|
||||
stripHash: false,
|
||||
stripTextFragment: true,
|
||||
removeQueryParameters: [/^utm_\w+/i, 'ref'],
|
||||
removeQueryParameters: [/^utm_\w+/i, 'ref', 'ref_src'],
|
||||
removeTrailingSlash: true,
|
||||
removeSingleSlash: true,
|
||||
removeExplicitPort: true,
|
||||
sortQueryParameters: true
|
||||
sortQueryParameters: true,
|
||||
...options
|
||||
})
|
||||
} catch (err) {
|
||||
// ignore invalid urls
|
||||
|
|
|
@ -106,7 +106,7 @@ export function chunkString(text: string, maxLength: number): string[] {
|
|||
* @param json - JSON value to stringify
|
||||
* @returns stringified value with all double quotes around object keys removed
|
||||
*/
|
||||
export function stringifyForModel(json: types.TaskOutput): string {
|
||||
export function stringifyForModel(json: types.Jsonifiable): string {
|
||||
const UNIQUE_PREFIX = defaultIDGeneratorFn()
|
||||
return (
|
||||
JSON.stringify(json, replacer)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import test from 'ava'
|
||||
|
||||
import { SerpAPIClient } from '@/services/serpapi'
|
||||
import { SerpAPIClient, SerpAPITool } from '@/index'
|
||||
|
||||
import { ky } from '../_utils'
|
||||
import { createTestAgenticRuntime, ky } from '../_utils'
|
||||
|
||||
test('SerpAPIClient.search - coffee', async (t) => {
|
||||
if (!process.env.SERPAPI_API_KEY) {
|
||||
|
@ -17,6 +17,19 @@ test('SerpAPIClient.search - coffee', async (t) => {
|
|||
t.truthy(result.organic_results)
|
||||
})
|
||||
|
||||
test('SerpAPIClient.search - news', async (t) => {
|
||||
if (!process.env.SERPAPI_API_KEY) {
|
||||
return t.pass()
|
||||
}
|
||||
|
||||
t.timeout(2 * 60 * 1000)
|
||||
const client = new SerpAPIClient({ ky })
|
||||
|
||||
const result = await client.search('OpenAI news')
|
||||
// console.log(JSON.stringify(result, null, 2))
|
||||
t.truthy(result.organic_results)
|
||||
})
|
||||
|
||||
test('SerpAPIClient.search - answer box', async (t) => {
|
||||
if (!process.env.SERPAPI_API_KEY) {
|
||||
return t.pass()
|
||||
|
@ -31,3 +44,18 @@ test('SerpAPIClient.search - answer box', async (t) => {
|
|||
// console.log(JSON.stringify(result, null, 2))
|
||||
t.truthy(result.answer_box)
|
||||
})
|
||||
|
||||
test('SerpAPITool - news', async (t) => {
|
||||
if (!process.env.SERPAPI_API_KEY) {
|
||||
return t.pass()
|
||||
}
|
||||
|
||||
t.timeout(2 * 60 * 1000)
|
||||
const agentic = createTestAgenticRuntime()
|
||||
const client = new SerpAPIClient({ ky })
|
||||
const tool = new SerpAPITool({ serpapi: client, agentic })
|
||||
|
||||
const result = await tool.call({ query: 'OpenAI news' })
|
||||
// console.log(JSON.stringify(result, null, 2))
|
||||
t.truthy(result.organic_results)
|
||||
})
|
||||
|
|
Ładowanie…
Reference in New Issue