kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
feat: improve SerpAPI robustness
rodzic
a9168fc2b9
commit
dfc87fed3b
|
@ -353,10 +353,24 @@ interface SearchResult extends BaseResponse<GoogleParameters> {
|
||||||
inline_images?: InlineImage[]
|
inline_images?: InlineImage[]
|
||||||
inline_people_also_search_for?: InlinePeopleAlsoSearchFor[]
|
inline_people_also_search_for?: InlinePeopleAlsoSearchFor[]
|
||||||
related_questions?: SearchResultRelatedQuestion[]
|
related_questions?: SearchResultRelatedQuestion[]
|
||||||
organic_results: OrganicResult[]
|
organic_results?: OrganicResult[]
|
||||||
related_searches?: RelatedSearch[]
|
related_searches?: RelatedSearch[]
|
||||||
pagination: Pagination
|
pagination: Pagination
|
||||||
serpapi_pagination: Pagination
|
serpapi_pagination: Pagination
|
||||||
|
twitter_results?: TwitterResults
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TwitterResults {
|
||||||
|
title: string
|
||||||
|
link: string
|
||||||
|
displayed_link: string
|
||||||
|
tweets: Tweet[]
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Tweet {
|
||||||
|
link: string
|
||||||
|
snippet: string
|
||||||
|
published_date: string
|
||||||
}
|
}
|
||||||
|
|
||||||
interface AnswerBox {
|
interface AnswerBox {
|
||||||
|
@ -647,16 +661,14 @@ export class SerpAPIClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
async search(queryOrOpts: string | GoogleParameters) {
|
async search(queryOrOpts: string | GoogleParameters) {
|
||||||
const defaultGoogleParams: Partial<GoogleParameters> = {
|
const defaultGoogleParams: Partial<GoogleParameters> = {}
|
||||||
num: 10
|
|
||||||
}
|
|
||||||
const options: GoogleParameters =
|
const options: GoogleParameters =
|
||||||
typeof queryOrOpts === 'string'
|
typeof queryOrOpts === 'string'
|
||||||
? { ...defaultGoogleParams, q: queryOrOpts }
|
? { ...defaultGoogleParams, q: queryOrOpts }
|
||||||
: queryOrOpts
|
: queryOrOpts
|
||||||
const { timeout, ...rest } = this.params
|
const { timeout, ...rest } = this.params
|
||||||
|
|
||||||
// console.log(options)
|
// console.log('SerpAPIClient.search', options)
|
||||||
return this.api
|
return this.api
|
||||||
.get('search', {
|
.get('search', {
|
||||||
searchParams: {
|
searchParams: {
|
||||||
|
|
|
@ -92,7 +92,6 @@ export class DiffbotTool extends BaseTask<DiffbotInput, DiffbotOutput> {
|
||||||
})
|
})
|
||||||
|
|
||||||
this._logger.info(res, `Diffbot response for url "${ctx.input!.url}"`)
|
this._logger.info(res, `Diffbot response for url "${ctx.input!.url}"`)
|
||||||
console.log(res)
|
|
||||||
|
|
||||||
const pickedRes = {
|
const pickedRes = {
|
||||||
type: res.type,
|
type: res.type,
|
||||||
|
|
|
@ -3,6 +3,7 @@ import { z } from 'zod'
|
||||||
import * as types from '@/types'
|
import * as types from '@/types'
|
||||||
import { SerpAPIClient } from '@/services/serpapi'
|
import { SerpAPIClient } from '@/services/serpapi'
|
||||||
import { BaseTask } from '@/task'
|
import { BaseTask } from '@/task'
|
||||||
|
import { normalizeUrl } from '@/url-utils'
|
||||||
|
|
||||||
export const SerpAPIInputSchema = z.object({
|
export const SerpAPIInputSchema = z.object({
|
||||||
query: z.string().describe('search query'),
|
query: z.string().describe('search query'),
|
||||||
|
@ -33,10 +34,23 @@ export const SerpAPIKnowledgeGraph = z.object({
|
||||||
description: z.string().optional()
|
description: z.string().optional()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
export const SerpAPITweet = z.object({
|
||||||
|
link: z.string().optional(),
|
||||||
|
snippet: z.string().optional(),
|
||||||
|
published_date: z.string().optional()
|
||||||
|
})
|
||||||
|
|
||||||
|
export const SerpAPITwitterResults = z.object({
|
||||||
|
title: z.string().optional(),
|
||||||
|
displayed_link: z.string().optional(),
|
||||||
|
tweets: z.array(SerpAPITweet).optional()
|
||||||
|
})
|
||||||
|
|
||||||
export const SerpAPIOutputSchema = z.object({
|
export const SerpAPIOutputSchema = z.object({
|
||||||
knowledgeGraph: SerpAPIKnowledgeGraph.optional(),
|
knowledge_graph: SerpAPIKnowledgeGraph.optional(),
|
||||||
answerBox: SerpAPIAnswerBox.optional(),
|
answer_box: SerpAPIAnswerBox.optional(),
|
||||||
organicResults: z.array(SerpAPIOrganicSearchResult).optional()
|
organic_results: z.array(SerpAPIOrganicSearchResult).optional(),
|
||||||
|
twitter_results: SerpAPITwitterResults.optional()
|
||||||
})
|
})
|
||||||
export type SerpAPIOutput = z.infer<typeof SerpAPIOutputSchema>
|
export type SerpAPIOutput = z.infer<typeof SerpAPIOutputSchema>
|
||||||
|
|
||||||
|
@ -77,9 +91,10 @@ export class SerpAPITool extends BaseTask<SerpAPIInput, SerpAPIOutput> {
|
||||||
protected override async _call(
|
protected override async _call(
|
||||||
ctx: types.TaskCallContext<SerpAPIInput>
|
ctx: types.TaskCallContext<SerpAPIInput>
|
||||||
): Promise<SerpAPIOutput> {
|
): Promise<SerpAPIOutput> {
|
||||||
|
const { query, numResults = 10 } = ctx.input!
|
||||||
|
|
||||||
const res = await this._serpapiClient.search({
|
const res = await this._serpapiClient.search({
|
||||||
q: ctx.input!.query,
|
q: query
|
||||||
num: ctx.input!.numResults
|
|
||||||
})
|
})
|
||||||
|
|
||||||
this._logger.debug(
|
this._logger.debug(
|
||||||
|
@ -87,10 +102,23 @@ export class SerpAPITool extends BaseTask<SerpAPIInput, SerpAPIOutput> {
|
||||||
`SerpAPI response for query ${JSON.stringify(ctx.input, null, 2)}"`
|
`SerpAPI response for query ${JSON.stringify(ctx.input, null, 2)}"`
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const twitterResults = res.twitter_results
|
||||||
|
? {
|
||||||
|
...res.twitter_results,
|
||||||
|
tweets: res.twitter_results.tweets?.map((tweet) => ({
|
||||||
|
...tweet,
|
||||||
|
link: normalizeUrl(tweet.link, {
|
||||||
|
removeQueryParameters: true
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
: undefined
|
||||||
|
|
||||||
return this.outputSchema.parse({
|
return this.outputSchema.parse({
|
||||||
knowledgeGraph: res.knowledge_graph,
|
knowledge_graph: res.knowledge_graph,
|
||||||
answerBox: res.answer_box,
|
answer_box: res.answer_box,
|
||||||
organicResults: res.organic_results
|
organic_results: res.organic_results?.slice(0, numResults),
|
||||||
|
twitter_results: twitterResults
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import isRelativeUrl from 'is-relative-url'
|
import isRelativeUrl from 'is-relative-url'
|
||||||
import normalizeUrlImpl from 'normalize-url'
|
import normalizeUrlImpl, { type Options } from 'normalize-url'
|
||||||
import QuickLRU from 'quick-lru'
|
import QuickLRU from 'quick-lru'
|
||||||
|
|
||||||
// const protocolAllowList = new Set(['https:', 'http:'])
|
// const protocolAllowList = new Set(['https:', 'http:'])
|
||||||
|
@ -7,7 +7,7 @@ const normalizedUrlCache = new QuickLRU<string, string | null>({
|
||||||
maxSize: 4000
|
maxSize: 4000
|
||||||
})
|
})
|
||||||
|
|
||||||
export function normalizeUrl(url: string): string | null {
|
export function normalizeUrl(url: string, options?: Options): string | null {
|
||||||
let normalizedUrl: string | null | undefined
|
let normalizedUrl: string | null | undefined
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -15,6 +15,7 @@ export function normalizeUrl(url: string): string | null {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: caching doesn't take into account `options`
|
||||||
normalizedUrl = normalizedUrlCache.get(url)
|
normalizedUrl = normalizedUrlCache.get(url)
|
||||||
|
|
||||||
if (normalizedUrl !== undefined) {
|
if (normalizedUrl !== undefined) {
|
||||||
|
@ -28,11 +29,12 @@ export function normalizeUrl(url: string): string | null {
|
||||||
forceHttps: false,
|
forceHttps: false,
|
||||||
stripHash: false,
|
stripHash: false,
|
||||||
stripTextFragment: true,
|
stripTextFragment: true,
|
||||||
removeQueryParameters: [/^utm_\w+/i, 'ref'],
|
removeQueryParameters: [/^utm_\w+/i, 'ref', 'ref_src'],
|
||||||
removeTrailingSlash: true,
|
removeTrailingSlash: true,
|
||||||
removeSingleSlash: true,
|
removeSingleSlash: true,
|
||||||
removeExplicitPort: true,
|
removeExplicitPort: true,
|
||||||
sortQueryParameters: true
|
sortQueryParameters: true,
|
||||||
|
...options
|
||||||
})
|
})
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
// ignore invalid urls
|
// ignore invalid urls
|
||||||
|
|
|
@ -106,7 +106,7 @@ export function chunkString(text: string, maxLength: number): string[] {
|
||||||
* @param json - JSON value to stringify
|
* @param json - JSON value to stringify
|
||||||
* @returns stringified value with all double quotes around object keys removed
|
* @returns stringified value with all double quotes around object keys removed
|
||||||
*/
|
*/
|
||||||
export function stringifyForModel(json: types.TaskOutput): string {
|
export function stringifyForModel(json: types.Jsonifiable): string {
|
||||||
const UNIQUE_PREFIX = defaultIDGeneratorFn()
|
const UNIQUE_PREFIX = defaultIDGeneratorFn()
|
||||||
return (
|
return (
|
||||||
JSON.stringify(json, replacer)
|
JSON.stringify(json, replacer)
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import test from 'ava'
|
import test from 'ava'
|
||||||
|
|
||||||
import { SerpAPIClient } from '@/services/serpapi'
|
import { SerpAPIClient, SerpAPITool } from '@/index'
|
||||||
|
|
||||||
import { ky } from '../_utils'
|
import { createTestAgenticRuntime, ky } from '../_utils'
|
||||||
|
|
||||||
test('SerpAPIClient.search - coffee', async (t) => {
|
test('SerpAPIClient.search - coffee', async (t) => {
|
||||||
if (!process.env.SERPAPI_API_KEY) {
|
if (!process.env.SERPAPI_API_KEY) {
|
||||||
|
@ -17,6 +17,19 @@ test('SerpAPIClient.search - coffee', async (t) => {
|
||||||
t.truthy(result.organic_results)
|
t.truthy(result.organic_results)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test('SerpAPIClient.search - news', async (t) => {
|
||||||
|
if (!process.env.SERPAPI_API_KEY) {
|
||||||
|
return t.pass()
|
||||||
|
}
|
||||||
|
|
||||||
|
t.timeout(2 * 60 * 1000)
|
||||||
|
const client = new SerpAPIClient({ ky })
|
||||||
|
|
||||||
|
const result = await client.search('OpenAI news')
|
||||||
|
// console.log(JSON.stringify(result, null, 2))
|
||||||
|
t.truthy(result.organic_results)
|
||||||
|
})
|
||||||
|
|
||||||
test('SerpAPIClient.search - answer box', async (t) => {
|
test('SerpAPIClient.search - answer box', async (t) => {
|
||||||
if (!process.env.SERPAPI_API_KEY) {
|
if (!process.env.SERPAPI_API_KEY) {
|
||||||
return t.pass()
|
return t.pass()
|
||||||
|
@ -31,3 +44,18 @@ test('SerpAPIClient.search - answer box', async (t) => {
|
||||||
// console.log(JSON.stringify(result, null, 2))
|
// console.log(JSON.stringify(result, null, 2))
|
||||||
t.truthy(result.answer_box)
|
t.truthy(result.answer_box)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test('SerpAPITool - news', async (t) => {
|
||||||
|
if (!process.env.SERPAPI_API_KEY) {
|
||||||
|
return t.pass()
|
||||||
|
}
|
||||||
|
|
||||||
|
t.timeout(2 * 60 * 1000)
|
||||||
|
const agentic = createTestAgenticRuntime()
|
||||||
|
const client = new SerpAPIClient({ ky })
|
||||||
|
const tool = new SerpAPITool({ serpapi: client, agentic })
|
||||||
|
|
||||||
|
const result = await tool.call({ query: 'OpenAI news' })
|
||||||
|
// console.log(JSON.stringify(result, null, 2))
|
||||||
|
t.truthy(result.organic_results)
|
||||||
|
})
|
||||||
|
|
Ładowanie…
Reference in New Issue