Merge pull request #643 from transitive-bullshit/agentic

feature/docs
Travis Fischer 2024-06-07 01:31:05 -05:00 zatwierdzone przez GitHub
commit 88026ed157
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
93 zmienionych plików z 39820 dodań i 15 usunięć

10
.editorconfig 100644
Wyświetl plik

@ -0,0 +1,10 @@
root = true
[*]
indent_style = space
indent_size = 2
tab_width = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true

6
.env.example 100644
Wyświetl plik

@ -0,0 +1,6 @@
# ------------------------------------------------------------------------------
# This is an example .env file.
#
# All of these environment vars must be defined either in your environment or in
# a local .env file in order to run this project.
# ------------------------------------------------------------------------------

8
.eslintrc.json 100644
Wyświetl plik

@ -0,0 +1,8 @@
{
"root": true,
"extends": ["@fisch0920/eslint-config/node"],
"rules": {
"unicorn/no-static-only-class": "off",
"@typescript-eslint/naming-convention": "off"
}
}

1
.github/funding.yml vendored 100644
Wyświetl plik

@ -0,0 +1 @@
github: [transitive-bullshit]

50
.github/workflows/main.yml vendored 100644
Wyświetl plik

@ -0,0 +1,50 @@
name: CI
on: [push, pull_request]
jobs:
test:
name: Test Node.js ${{ matrix.node-version }}
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
node-version:
- 20
- 21
- 22
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
- name: Install pnpm
uses: pnpm/action-setup@v3
id: pnpm-install
with:
version: 9.1.4
run_install: false
- name: Get pnpm store directory
shell: bash
run: |
echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
- name: Setup pnpm cache
uses: actions/cache@v4
with:
path: ${{ env.STORE_PATH }}
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-pnpm-store-
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Run test
run: pnpm run test

21
.gitignore vendored
Wyświetl plik

@ -1,10 +1,7 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
*.swp
.idea
# dependencies
/node_modules
node_modules
/.pnp
.pnp.js
@ -12,11 +9,12 @@
/coverage
# next.js
/.next/
/out/
.next/
out/
# production
/build
build/
dist/
# misc
.DS_Store
@ -38,13 +36,6 @@ yarn-error.log*
*.tsbuildinfo
next-env.d.ts
# local env files
.env
.env.local
.env.build
.env.development.local
.env.test.local
.env.production.local
# data dumps
out/
old/

Wyświetl plik

@ -0,0 +1 @@
npm run precommit

2
.npmrc 100644
Wyświetl plik

@ -0,0 +1,2 @@
enable-pre-post-scripts=true
package-manager-strict=false

11
.prettierrc 100644
Wyświetl plik

@ -0,0 +1,11 @@
{
"singleQuote": true,
"jsxSingleQuote": true,
"semi": false,
"useTabs": false,
"tabWidth": 2,
"bracketSpacing": true,
"bracketSameLine": false,
"arrowParens": "always",
"trailingComma": "none"
}

128
bin/scratch.ts 100644
Wyświetl plik

@ -0,0 +1,128 @@
#!/usr/bin/env node
import 'dotenv/config'
import restoreCursor from 'restore-cursor'
// import { SearxngClient } from '../src/services/searxng-client.js'
// import { ClearbitClient } from '../src/index.js'
// import { ProxycurlClient } from '../src/services/proxycurl-client.js'
// import { WikipediaClient } from '../src/index.js'
// import { PerigonClient } from '../src/index.js'
// import { FirecrawlClient } from '../src/index.js'
// import { ExaClient } from '../src/index.js'
// import { DiffbotClient } from '../src/index.js'
// import { WolframAlphaClient } from '../src/index.js'
// import {
// createTwitterV2Client,
// TwitterClient
// } from '../src/services/twitter/index.js'
// import { MidjourneyClient } from '../src/index.js'
// import { BingClient } from '../src/index.js'
import { TavilyClient } from '../src/index.js'
/**
* Scratch pad for testing.
*/
async function main() {
restoreCursor()
// const clearbit = new ClearbitClient()
// const res = await clearbit.companyEnrichment({
// domain: 'https://clay.com'
// })
// console.log(JSON.stringify(res, null, 2))
// const proxycurl = new ProxycurlClient()
// const res = await proxycurl.getLinkedInPerson({
// linkedin_profile_url: 'https://linkedin.com/in/fisch2'
// })
// console.log(JSON.stringify(res, null, 2))
// const wikipedia = new WikipediaClient()
// const res = await wikipedia.getPageSummary({
// // title: 'Naruto_(TV_series)'
// title: 'SpaceX'
// })
// console.log(JSON.stringify(res, null, 2))
// const searxng = new SearxngClient()
// const res = await searxng.search({
// query: 'golden gate bridge',
// engines: ['reddit']
// })
// console.log(JSON.stringify(res, null, 2))
// const perigon = new PerigonClient()
// const res = await perigon.searchArticles({
// q: 'AI agents AND startup',
// sourceGroup: 'top50tech'
// })
// console.log(JSON.stringify(res, null, 2))
// const firecrawl = new FirecrawlClient()
// const res = await firecrawl.scrapeUrl({
// url: 'https://www.bbc.com/news/articles/cp4475gwny1o'
// // url: 'https://www.theguardian.com/technology/article/2024/jun/04/openai-google-ai-risks-letter'
// // url: 'https://www.firecrawl.dev'
// })
// console.log(JSON.stringify(res, null, 2))
// const exa = new ExaClient()
// const res = await exa.search({
// query: 'OpenAI',
// contents: { text: true }
// })
// console.log(JSON.stringify(res, null, 2))
// const diffbot = new DiffbotClient()
// // const res = await diffbot.analyzeUrl({
// // url: 'https://www.bbc.com/news/articles/cp4475gwny1o'
// // })
// const res = await diffbot.enhanceEntity({
// type: 'Person',
// name: 'Kevin Raheja'
// })
// console.log(JSON.stringify(res, null, 2))
// const wolfram = new WolframAlphaClient()
// const res = await wolfram.ask({
// input: 'population of new york city'
// })
// console.log(res)
// const client = await createTwitterV2Client({
// scopes: ['tweet.read', 'users.read', 'offline.access']
// })
// const twitter = new TwitterClient({ client })
// // const res = await twitter.findUserByUsername({ username: 'transitive_bs' })
// const res = await twitter.searchRecentTweets({
// query: 'open source AI agents'
// })
// console.log(res)
// const midjourney = new MidjourneyClient()
// const res = await midjourney.imagine(
// 'tiny lil baby kittens playing with an inquisitive AI robot, kawaii, anime'
// )
// console.log(JSON.stringify(res, null, 2))
// const bing = new BingClient()
// const res = await bing.search({
// q: 'world cup 2024 freestyle wrestling news'
// })
// console.log(JSON.stringify(res, null, 2))
const tavily = new TavilyClient()
const res = await tavily.search({
query: 'when do experts predict that OpenAI will release GPT-5?',
include_answer: true
})
console.log(JSON.stringify(res, null, 2))
}
try {
await main()
} catch (err) {
console.error('error', err)
process.exit(1)
}

Wyświetl plik

@ -0,0 +1,26 @@
#!/usr/bin/env node
import 'dotenv/config'
import { openai } from '@ai-sdk/openai'
import { Browserbase, BrowserbaseAISDK } from '@browserbasehq/sdk'
import { generateText } from 'ai'
async function main() {
const browserbase = new Browserbase()
const browserTool = BrowserbaseAISDK(browserbase, { textContent: true })
console.log(browserTool.parameters)
const result = await generateText({
model: openai('gpt-4o'),
tools: { browserTool },
toolChoice: 'required',
temperature: 0,
system: 'You are a helpful assistant. Be as concise as possible.',
prompt: 'What is the weather in San Francisco?'
})
console.log(result.toolResults[0])
}
await main()

Wyświetl plik

@ -0,0 +1,24 @@
#!/usr/bin/env node
import 'dotenv/config'
import { WeatherClient } from '@agentic/stdlib'
import { createAISDKTools } from '@agentic/stdlib/ai-sdk'
import { openai } from '@ai-sdk/openai'
import { generateText } from 'ai'
async function main() {
const weather = new WeatherClient()
const result = await generateText({
model: openai('gpt-4o'),
tools: createAISDKTools(weather),
toolChoice: 'required',
temperature: 0,
system: 'You are a helpful assistant. Be as concise as possible.',
prompt: 'What is the weather in San Francisco?'
})
console.log(result.toolResults[0])
}
await main()

Wyświetl plik

@ -0,0 +1,28 @@
#!/usr/bin/env node
import 'dotenv/config'
import { DiffbotClient, SearchAndCrawl, SerpAPIClient } from '@agentic/stdlib'
import { createDexterFunctions } from '@agentic/stdlib/dexter'
import { ChatModel, createAIRunner } from '@dexaai/dexter'
async function main() {
const serpapi = new SerpAPIClient()
const diffbot = new DiffbotClient()
const searchAndCrawl = new SearchAndCrawl({ serpapi, diffbot })
const runner = createAIRunner({
chatModel: new ChatModel({
params: { model: 'gpt-4o', temperature: 0 }
// debug: true
}),
functions: createDexterFunctions(searchAndCrawl),
systemMessage:
'You are a McKinsey analyst who is an expert at writing executive summaries. Always cite your sources and respond using Markdown.'
})
const topic = 'the 2024 olympics'
const result = await runner(`Summarize the latest news on ${topic}`)
console.log(result)
}
await main()

Wyświetl plik

@ -0,0 +1,23 @@
#!/usr/bin/env node
import 'dotenv/config'
import { createDexterFunctions } from '@agentic/stdlib/dexter'
import { e2b } from '@agentic/stdlib/e2b'
import { ChatModel, createAIRunner } from '@dexaai/dexter'
async function main() {
const runner = createAIRunner({
chatModel: new ChatModel({
params: { model: 'gpt-4o', temperature: 0 },
debug: true
}),
functions: createDexterFunctions(e2b)
})
const result = await runner(
'Visualize a distribution of height of men based on the latest data you know. Also print the median value.'
)
console.log(result)
}
await main()

Wyświetl plik

@ -0,0 +1,31 @@
#!/usr/bin/env node
import 'dotenv/config'
import { PerigonClient, SerperClient } from '@agentic/stdlib'
import { createDexterFunctions } from '@agentic/stdlib/dexter'
import { ChatModel, createAIRunner } from '@dexaai/dexter'
async function main() {
const perigon = new PerigonClient()
const serper = new SerperClient()
const runner = createAIRunner({
chatModel: new ChatModel({
params: { model: 'gpt-4o', temperature: 0 }
// debug: true
}),
functions: createDexterFunctions(
perigon.functions.pick('search_news_stories'),
serper
),
systemMessage:
'You are a helpful assistant. Be as concise as possible. Respond in markdown. Always cite your sources.'
})
const result = await runner(
'Summarize the latest news stories about the upcoming US election.'
)
console.log(result)
}
await main()

Wyświetl plik

@ -0,0 +1,24 @@
#!/usr/bin/env node
import 'dotenv/config'
import { WeatherClient } from '@agentic/stdlib'
import { createDexterFunctions } from '@agentic/stdlib/dexter'
import { ChatModel, createAIRunner } from '@dexaai/dexter'
async function main() {
const weather = new WeatherClient()
const runner = createAIRunner({
chatModel: new ChatModel({
params: { model: 'gpt-4o', temperature: 0 }
// debug: true
}),
functions: createDexterFunctions(weather),
systemMessage: 'You are a helpful assistant. Be as concise as possible.'
})
const result = await runner('What is the weather in San Francisco?')
console.log(result)
}
await main()

Wyświetl plik

@ -0,0 +1,36 @@
#!/usr/bin/env node
import 'dotenv/config'
import { WeatherClient } from '@agentic/stdlib'
import { createGenkitTools } from '@agentic/stdlib/genkit'
import { generate } from '@genkit-ai/ai'
import { configureGenkit } from '@genkit-ai/core'
import { gpt4o, openAI } from 'genkitx-openai'
async function main() {
const weather = new WeatherClient()
configureGenkit({
plugins: [openAI()]
})
const result = await generate({
model: gpt4o,
tools: createGenkitTools(weather),
history: [
{
role: 'system',
content: [
{
text: 'You are a helpful assistant. Be as concise as possible.'
}
]
}
],
prompt: 'What is the weather in San Francisco?'
})
console.log(result)
}
await main()

Wyświetl plik

@ -0,0 +1,38 @@
#!/usr/bin/env node
import 'dotenv/config'
import { WeatherClient } from '@agentic/stdlib'
import { createLangChainTools } from '@agentic/stdlib/langchain'
import { ChatPromptTemplate } from '@langchain/core/prompts'
import { ChatOpenAI } from '@langchain/openai'
import { AgentExecutor, createToolCallingAgent } from 'langchain/agents'
async function main() {
const weather = new WeatherClient()
const tools = createLangChainTools(weather)
const agent = createToolCallingAgent({
llm: new ChatOpenAI({ model: 'gpt-4o', temperature: 0 }),
tools,
prompt: ChatPromptTemplate.fromMessages([
['system', 'You are a helpful assistant. Be as concise as possible.'],
['placeholder', '{chat_history}'],
['human', '{input}'],
['placeholder', '{agent_scratchpad}']
])
})
const agentExecutor = new AgentExecutor({
agent,
tools
// verbose: true
})
const result = await agentExecutor.invoke({
input: 'What is the weather in San Francisco?'
})
console.log(result.output)
}
await main()

Wyświetl plik

@ -0,0 +1,25 @@
#!/usr/bin/env node
import 'dotenv/config'
import { WeatherClient } from '@agentic/stdlib'
import { createLlamaIndexTools } from '@agentic/stdlib/llamaindex'
import { OpenAI, OpenAIAgent } from 'llamaindex'
async function main() {
const weather = new WeatherClient()
const tools = createLlamaIndexTools(weather)
const agent = new OpenAIAgent({
llm: new OpenAI({ model: 'gpt-4o', temperature: 0 }),
systemPrompt: 'You are a helpful assistant. Be as concise as possible.',
tools
})
const response = await agent.chat({
message: 'What is the weather in San Francisco?'
})
console.log(response.response.message.content)
}
await main()

Wyświetl plik

@ -0,0 +1,59 @@
#!/usr/bin/env node
import 'dotenv/config'
import { assert, WeatherClient } from '@agentic/stdlib'
import OpenAI from 'openai'
async function main() {
const weather = new WeatherClient()
const openai = new OpenAI()
const messages: OpenAI.ChatCompletionMessageParam[] = [
{
role: 'system',
content: 'You are a helpful assistant. Be as concise as possible.'
},
{ role: 'user', content: 'What is the weather in San Francisco?' }
]
{
// First call to OpenAI to invoke the weather tool
const res = await openai.chat.completions.create({
messages,
model: 'gpt-4o',
temperature: 0,
tools: weather.functions.toolSpecs,
tool_choice: 'required'
})
const message = res.choices[0]?.message!
console.log(JSON.stringify(message, null, 2))
assert(message.tool_calls?.[0]?.function?.name === 'get_current_weather')
const fn = weather.functions.get('get_current_weather')!
assert(fn)
const toolParams = message.tool_calls[0].function.arguments
const toolResult = await fn(toolParams)
messages.push(message)
messages.push({
role: 'tool',
tool_call_id: message.tool_calls[0].id,
content: JSON.stringify(toolResult)
})
}
{
// Second call to OpenAI to generate a text response
const res = await openai.chat.completions.create({
messages,
model: 'gpt-4o',
temperature: 0,
tools: weather.functions.toolSpecs
})
const message = res.choices[0].message
console.log(JSON.stringify(message, null, 2))
}
}
await main()

Wyświetl plik

@ -0,0 +1,43 @@
{
"name": "agentic-examples",
"private": true,
"version": "0.1.0",
"description": "TODO",
"author": "Travis Fischer <travis@transitivebullsh.it>",
"license": "MIT",
"repository": {
"type": "git",
"url": "transitive-bullshit/agentic"
},
"packageManager": "pnpm@9.1.4",
"engines": {
"node": ">=18"
},
"type": "module",
"scripts": {
"preinstall": "npx only-allow pnpm",
"build": "tsc",
"clean": "del dist",
"prebuild": "run-s clean",
"pretest": "run-s build",
"test": "run-s test:*",
"test:typecheck": "tsc --noEmit"
},
"dependencies": {
"@agentic/stdlib": "workspace:*",
"@ai-sdk/openai": "^0.0.24",
"@dexaai/dexter": "^2.1.0",
"@genkit-ai/ai": "^0.5.2",
"@genkit-ai/core": "^0.5.2",
"@instructor-ai/instructor": "^1.3.0",
"@langchain/core": "^0.2.6",
"@langchain/openai": "^0.1.2",
"ai": "^3.1.30",
"dotenv": "^16.4.5",
"genkitx-openai": "^0.10.0",
"langchain": "^0.2.5",
"llamaindex": "^0.3.16",
"openai": "^4.49.0",
"zod": "^3.23.8"
}
}

Wyświetl plik

@ -0,0 +1,4 @@
{
"extends": "../tsconfig.json",
"include": ["ai-sdk", "dexter", "genkit", "langchain", "llamaindex", "openai"]
}

11603
json-schema.json 100644

File diff suppressed because one or more lines are too long

21
license 100644
Wyświetl plik

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 Travis Fischer
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

Plik binarny nie jest wyświetlany.

Po

Szerokość:  |  Wysokość:  |  Rozmiar: 406 KiB

179
package.json 100644
Wyświetl plik

@ -0,0 +1,179 @@
{
"name": "@agentic/stdlib",
"private": true,
"version": "0.1.0",
"description": "TODO",
"author": "Travis Fischer <travis@transitivebullsh.it>",
"license": "MIT",
"repository": {
"type": "git",
"url": "transitive-bullshit/agentic"
},
"packageManager": "pnpm@9.1.4",
"engines": {
"node": ">=18"
},
"type": "module",
"source": "./src/index.ts",
"types": "./dist/index.d.ts",
"sideEffects": false,
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js",
"default": "./dist/index.js"
},
"./ai-sdk": {
"types": "./dist/sdks/ai-sdk.d.ts",
"import": "./dist/sdks/ai-sdk.js",
"default": "./dist/sdks/ai-sdk.js"
},
"./dexter": {
"types": "./dist/sdks/dexter.d.ts",
"import": "./dist/sdks/dexter.js",
"default": "./dist/sdks/dexter.js"
},
"./genkit": {
"types": "./dist/sdks/genkit.d.ts",
"import": "./dist/sdks/genkit.js",
"default": "./dist/sdks/genkit.js"
},
"./langchain": {
"types": "./dist/sdks/langchain.d.ts",
"import": "./dist/sdks/langchain.js",
"default": "./dist/sdks/langchain.js"
},
"./llamaindex": {
"types": "./dist/sdks/llamaindex.d.ts",
"import": "./dist/sdks/llamaindex.js",
"default": "./dist/sdks/llamaindex.js"
},
"./calculator": {
"types": "./dist/tools/calculator.d.ts",
"import": "./dist/tools/calculator.js",
"default": "./dist/tools/calculator.js"
},
"./e2b": {
"types": "./dist/tools/e2b.d.ts",
"import": "./dist/tools/e2b.js",
"default": "./dist/tools/e2b.js"
},
"./twitter": {
"types": "./dist/services/twitter/index.d.ts",
"import": "./dist/services/twitter/index.js",
"default": "./dist/services/twitter/index.js"
}
},
"files": [
"dist"
],
"scripts": {
"preinstall": "npx only-allow pnpm",
"build": "tsup",
"clean": "del dist",
"prebuild": "run-s clean",
"predev": "run-s clean",
"pretest": "run-s build",
"prepare": "husky",
"precommit": "lint-staged",
"test": "run-s test:*",
"test:format": "prettier --check \"**/*.{js,ts,tsx}\"",
"test:lint": "eslint .",
"test:typecheck": "tsc --noEmit",
"test:unit": "vitest run"
},
"dependencies": {
"@nangohq/node": "^0.39.33",
"dedent": "^1.5.3",
"delay": "^6.0.0",
"hash-object": "^5.0.1",
"is-relative-url": "^4.0.0",
"jsonrepair": "^3.6.1",
"ky": "^1.2.4",
"normalize-url": "^8.0.1",
"p-map": "^7.0.2",
"p-throttle": "^6.1.0",
"quick-lru": "^7.0.0",
"type-fest": "^4.19.0",
"zod": "^3.23.3",
"zod-to-json-schema": "^3.23.0"
},
"devDependencies": {
"@browserbasehq/sdk": "^1.2.1",
"@dexaai/dexter": "^2.0.3",
"@e2b/code-interpreter": "^0.0.7",
"@fisch0920/eslint-config": "^1.3.3",
"@genkit-ai/ai": "^0.5.2",
"@instructor-ai/instructor": "^1.3.0",
"@langchain/core": "^0.2.6",
"@total-typescript/ts-reset": "^0.5.1",
"@types/node": "^20.14.2",
"ai": "^3.1.30",
"del-cli": "^5.1.0",
"dotenv": "^16.4.5",
"eslint": "^8.57.0",
"expr-eval": "^2.0.2",
"husky": "^9.0.11",
"lint-staged": "^15.2.5",
"llamaindex": "^0.3.16",
"np": "^10.0.5",
"npm-run-all2": "^6.2.0",
"only-allow": "^1.2.1",
"openai-fetch": "^2.0.3",
"prettier": "^3.3.1",
"restore-cursor": "^5.0.0",
"ts-node": "^10.9.2",
"tsup": "^8.1.0",
"tsx": "^4.13.0",
"twitter-api-sdk": "^1.2.1",
"typescript": "^5.4.5",
"vitest": "2.0.0-beta.3"
},
"peerDependencies": {
"@dexaai/dexter": "^2.0.3",
"@e2b/code-interpreter": "^0.0.7",
"@genkit-ai/ai": "^0.5.2",
"@langchain/core": "^0.2.6",
"ai": "^3.1.30",
"expr-eval": "^2.0.2",
"llamaindex": "^0.3.16",
"twitter-api-sdk": "^1.2.1"
},
"peerDependenciesMeta": {
"@dexaai/dexter": {
"optional": true
},
"@e2b/code-interpreter": {
"optional": true
},
"@genkit-ai/ai": {
"optional": true
},
"@langchain/core": {
"optional": true
},
"ai": {
"optional": true
},
"expr-eval": {
"optional": true
},
"llamaindex": {
"optional": true
},
"twitter-api-sdk": {
"optional": true
}
},
"lint-staged": {
"*.{ts,tsx}": [
"eslint --fix",
"prettier --ignore-unknown --write"
]
},
"pnpm": {
"overrides": {
"esbuild": "~0.21.4"
}
}
}

12675
pnpm-lock.yaml 100644

Plik diff jest za duży Load Diff

Wyświetl plik

@ -0,0 +1,2 @@
packages:
- 'examples'

208
readme.md 100644
Wyświetl plik

@ -0,0 +1,208 @@
<p align="center">
<a href="https://trywalter.ai"><img alt="Agentic" src="/media/agentic-header.jpg" width="308"></a>
</p>
<p align="center">
<em>AI agent stdlib that works with any LLM and TypeScript AI SDK</em>
</p>
<p align="center">
<a href="https://github.com/transitive-bullshit/walter/actions/workflows/main.yml"><img alt="Build Status" src="https://github.com/transitive-bullshit/walter/actions/workflows/main.yml/badge.svg" /></a>
<a href="https://www.npmjs.com/package/@agentic/stdlib"><img alt="NPM" src="https://img.shields.io/npm/v/@agentic/stdlib.svg" /></a>
<a href="https://github.com/transitive-bullshit/walter/blob/main/license"><img alt="MIT License" src="https://img.shields.io/badge/license-MIT-blue" /></a>
<a href="https://prettier.io"><img alt="Prettier Code Formatting" src="https://img.shields.io/badge/code_style-prettier-brightgreen.svg" /></a>
</p>
# Agentic <!-- omit from toc -->
> [!WARNING]
> TODO: this project is not published yet and is an active WIP.
The goal of this project is to create a **set of standard AI functions / tools** which are **optimized for both normal TS-usage as well as LLM-based apps** and that work with all of the major AI SDKs (LangChain, LlamaIndex, Vercel AI SDK, OpenAI SDK, etc).
For example, stdlib clients like `WeatherClient` can be used as normal TS classes:
```ts
import { WeatherClient } from '@agentic/stdlib'
const weather = new WeatherClient() // (requires `WEATHER_API_KEY` env var)
const result = await weather.getCurrentWeather({
q: 'San Francisco'
})
console.log(result)
```
Or you can use them as LLM-based tools where the LLM decides when and how to invoke the underlying functions for you.
This works across all of the major AI SDKs via adaptors. Here's an example using [Vercel's AI SDK](https://github.com/vercel/ai):
```ts
// sdk-specific imports
import { openai } from '@ai-sdk/openai'
import { generateText } from 'ai'
import { createAISDKTools } from '@agentic/stdlib/ai-sdk'
// sdk-agnostic imports
import { WeatherClient } from '@agentic/stdlib'
const weather = new WeatherClient()
const result = await generateText({
model: openai('gpt-4o'),
// this is the key line which uses the `@agentic/stdlib/ai-sdk` adaptor
tools: createAISDKTools(weather),
toolChoice: 'required',
prompt: 'What is the weather in San Francisco?'
})
console.log(result.toolResults[0])
```
You can use our standard library of thoroughly tested AI functions with your favorite AI SDK – without having to write any glue code!
Here's a slightly more complex example which uses multiple clients and selects a subset of their functions using the `AIFunctionSet.pick` method:
```ts
// sdk-specific imports
import { ChatModel, createAIRunner } from '@dexaai/dexter'
import { createDexterFunctions } from '@agentic/stdlib/dexter'
// sdk-agnostic imports
import { PerigonClient, SerperClient } from '@agentic/stdlib'
async function main() {
// Perigon is a news API and Serper is a Google search API
const perigon = new PerigonClient()
const serper = new SerperClient()
const runner = createAIRunner({
chatModel: new ChatModel({
params: { model: 'gpt-4o', temperature: 0 }
}),
functions: createDexterFunctions(
perigon.functions.pick('search_news_stories'),
serper
),
systemMessage: `You are a helpful assistant. Be as concise as possible.`
})
const result = await runner(
'Summarize the latest news stories about the upcoming US election.'
)
console.log(result)
}
```
Here we've exposed 2 functions to the LLM, `search_news_stories` (which comes from the `PerigonClient.searchStories` method) and `serper_google_search` (which implicitly comes from the `SerperClient.search` method).
All of the SDK adaptors like `createDexterFunctions` accept very flexible in what they accept. `AIFunctionLike` objects include:
- `AIFunctionSet` - Sets of AI functions (like `perigon.functions.pick('search_news_stories')` or `perigon.functions` or `serper.functions`)
- `AIFunctionsProvider` - Client classes which expose an `AIFunctionSet` via the `.functions` property (like `perigon` or `serper`)
- `AIFunction` - Individual functions (like `perigon.functions.get('search_news_stories')` or `serper.functions.get('serper_google_search')` or AI functions created directly via the `createAIFunction` utility function)
You can pass as many of these `AIFunctionLike` objects as you'd like and you can manipulate them as `AIFunctionSet` sets via `.pick`, `.omit`, `.get`, `.map`, etc.
All heavy third-party imports are isolated as _optional peer dependencies_ to keep the main `@agentic/stdlib` package as lightweight as possible.
## Services
| Service | Client | Description |
| ------------------------------------------------------------------------ | ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [Bing](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) | `BingClient` | Bing web search. |
| [Calculator](https://github.com/silentmatt/expr-eval) | `calculator` | Basic calculator for simple mathematical expressions. |
| [Clearbit](https://dashboard.clearbit.com/docs) | `ClearbitClient` | Resolving and enriching people and company datae. |
| [Dexa](https://dexa.ai) | `DexaClient` | Answers questions from the world's best podcasters. |
| [Diffbot](https://docs.diffbot.com) | `DiffbotClient` | Web page classification and scraping; person and company data enrichment. |
| [E2B](https://e2b.dev) | `e2b` | Hosted Python code intrepreter sandbox which is really useful for data analysis, flexible code execution, and advanced reasoning on-the-fly. |
| [Exa](https://docs.exa.ai) | `ExaClient` | Web search tailored for LLMs. |
| [Firecrawl](https://www.firecrawl.dev) | `FirecrawlClient` | Website scraping and sanitization. |
| [Midjourney](https://www.imagineapi.dev) | `MidjourneyClient` | Unofficial Midjourney client for generative images. |
| [Novu](https://novu.co) | `NovuClient` | Sending notifications (email, SMS, in-app, push, etc). |
| [People Data Labs](https://www.peopledatalabs.com) | `PeopleDataLabsClient` | People & company data (WIP). |
| [Perigon](https://www.goperigon.com/products/news-api) | `PerigonClient` | Real-time news API and web content data from 140,000+ sources. Structured and enriched by AI, primed for LLMs. |
| [Polygon](https://polygon.io) | `PolygonClient` | Stock market and company financial data. |
| [PredictLeads](https://predictleads.com) | `PredictLeadsClient` | In-depth company data including signals like fundraising events, hiring news, product launches, technologies used, etc. |
| [Proxycurl](https://nubela.co/proxycurl) | `ProxycurlClient` | People and company data from LinkedIn & Crunchbase. |
| Scraper | `ScraperClient` | Scrapes URLs into clean html/markdown/text content (TODO: currently closed beta). |
| [Searxng](https://docs.searxng.org) | `SearxngClient` | OSS meta search engine capable of searching across many providers like Reddit, Google, Brave, Arxiv, Genius, IMDB, Rotten Tomatoes, Wikidata, Wolfram Alpha, YouTube, GitHub, [etc](https://docs.searxng.org/user/configured_engines.html#configured-engines). |
| [SerpAPI](https://serpapi.com/search-api) | `SerpAPIClient` | Lightweight wrapper around SerpAPI for Google search. |
| [Serper](https://serper.dev) | `SerperClient` | Lightweight wrapper around Serper for Google search. |
| [Slack](https://api.slack.com/docs) | `SlackClient` | Send and receive Slack messages. |
| [Tavily](https://tavily.com) | `TavilyClient` | Web search API tailored for LLMs. 🔥 |
| [Twilio](https://www.twilio.com/docs/conversations/api) | `TwilioClient` | Twilio conversation API to send and receive SMS messages. |
| [Twitter](https://developer.x.com/en/docs/twitter-api) | `TwitterClient` | Basic Twitter API methods for fetching users, tweets, and searching recent tweets. Includes support for plan-aware rate-limiting. Uses [Nango](https://www.nango.dev) for OAuth support. |
| [WeatherAPI](https://www.weatherapi.com) | `WeatherClient` | Basic access to current weather data based on location. |
| [Wikipedia](https://www.mediawiki.org/wiki/API) | `WikipediaClient` | Wikipedia page search and summaries. |
| [Wolfram Alpha](https://products.wolframalpha.com/llm-api/documentation) | `WolframAlphaClient` | Wolfram Alpha LLM API client for answering computational, mathematical, and scientific questions. |
Note that many of these clients expose multiple AI functions.
## Compound Tools
- `SearchAndCrawl`
## AI SDKs
- OpenAI SDK
- no need for an adaptor; use `AIFunctionSet.specs` or `AIFunctionSet.toolSpecs`
- Vercel AI SDK
- `import { createAISDKTools } from '@agentic/stdlib/ai-sdk'`
- LangChain
- `import { createLangChainTools } from '@agentic/stdlib/langchain'`
- LlamaIndex
- `import { createLlamaIndexTools } from '@agentic/stdlib/llamaindex'`
- Firebase Genkit
- `import { createGenkitTools } from '@agentic/stdlib/genkit'`
- Dexa Dexter
- `import { createDexterFunctions } from '@agentic/stdlib/dexter'`
## Client Goals
- clients should be as minimal as possible
- clients should use `ky` and `zod` where possible
- clients should have a strongly-typed TS DX
- clients should expose select methods via the `@aiFunction(...)` decorator
- `inputSchema` zod schemas should be as minimal as possible with descriptions prompt engineered specifically for use with LLMs
- clients and AIFunctions should be composable via `AIFunctionSet`
- clients should work with all major TS AI SDKs
- SDK adaptors should be as lightweight as possible and be optional peer dependencies of `@agentic/stdlib`
## TODO
- rename this repo to agentic
- sdks
- modelfusion
- services
- browserbase
- [phantombuster](https://phantombuster.com)
- perplexity
- valtown
- replicate
- huggingface
- [skyvern](https://github.com/Skyvern-AI/skyvern)
- pull from [langchain](https://github.com/langchain-ai/langchainjs/tree/main/langchain)
- provide a converter for langchain `DynamicStructuredTool`
- pull from [nango](https://docs.nango.dev/integrations/overview)
- pull from [activepieces](https://github.com/activepieces/activepieces/tree/main/packages/pieces/community)
- general openapi support ala [workgpt](https://github.com/team-openpm/workgpt)
- compound tools / chains / flows / runnables
- market maps
- incorporate [zod-validation-error](https://github.com/causaly/zod-validation-error)
- investigate [autotool](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/autotool)
- investigate [data connectors](https://github.com/mendableai/data-connectors)
## Contributors
- [Travis Fischer](https://x.com/transitive_bs)
- [Kevin Raheja](https://x.com/crabfisher)
- [David Zhang](https://x.com/dzhng)
- [Philipp Burckhardt](https://x.com/burckhap)
- [Riley Tomasek](https://x.com/rileytomasek)
## License
MIT © [Travis Fischer](https://x.com/transitive_bs)
To stay up to date or learn more, follow [@transitive_bs](https://x.com/transitive_bs) on Twitter.

Wyświetl plik

@ -0,0 +1,133 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`parseArrayOutput - handles arrays surrounded by text correctly > should return ["a", "b", "c"] for "Array: ["a", "b", "c"]. That's all!" 1`] = `
[
"a",
"b",
"c",
]
`;
exports[`parseArrayOutput - handles arrays surrounded by text correctly > should return [{"a": 1}, {"b": 2}] for "This is the array [{"a": 1}, {"b": 2}] in the text" 1`] = `
[
{
"a": 1,
},
{
"b": 2,
},
]
`;
exports[`parseArrayOutput - handles arrays surrounded by text correctly > should return [1, 2, 3] for "The array is [1,2,3]" 1`] = `
[
1,
2,
3,
]
`;
exports[`parseArrayOutput - handles valid arrays correctly > should return ["a", "b", "c"] for "["a", "b", "c"] 1`] = `
[
"a",
"b",
"c",
]
`;
exports[`parseArrayOutput - handles valid arrays correctly > should return [{"a": 1}, {"b": 2}] for [{"a": 1}, {"b": 2}] 1`] = `
[
{
"a": 1,
},
{
"b": 2,
},
]
`;
exports[`parseArrayOutput - handles valid arrays correctly > should return [1, 2, 3] for "[1,2,3]" 1`] = `
[
1,
2,
3,
]
`;
exports[`parseBooleanOutput - handles \`false\` outputs correctly > should return false for "FALSE" 1`] = `false`;
exports[`parseBooleanOutput - handles \`false\` outputs correctly > should return false for "False" 1`] = `false`;
exports[`parseBooleanOutput - handles \`false\` outputs correctly > should return false for "false!" 1`] = `false`;
exports[`parseBooleanOutput - handles \`true\` outputs correctly > should return true for "TRUE" 1`] = `true`;
exports[`parseBooleanOutput - handles \`true\` outputs correctly > should return true for "True" 1`] = `true`;
exports[`parseBooleanOutput - handles \`true\` outputs correctly > should return true for "true." 1`] = `true`;
exports[`parseNumberOutput - handles float outputs correctly > should return -5.5 for " -5.5 " 1`] = `-5.5`;
exports[`parseNumberOutput - handles float outputs correctly > should return 42.42 for "42.42" 1`] = `42.42`;
exports[`parseNumberOutput - handles integer outputs correctly > should return -5 for " -5 " 1`] = `-5`;
exports[`parseNumberOutput - handles integer outputs correctly > should return 42 for "42" 1`] = `42`;
exports[`parseObjectOutput - handles JSON array of objects > should return first object {"a":1,"b":2} for [{"a":1,"b":2},{"c":3,"d":4}] 1`] = `
{
"a": 1,
"b": 2,
}
`;
exports[`parseObjectOutput - handles objects surrounded by text correctly > should return {"a":1,"b":2,"c":3} for "The object is {"a":1,"b":2,"c":3}" 1`] = `
{
"a": 1,
"b": 2,
"c": 3,
}
`;
exports[`parseObjectOutput - handles objects surrounded by text correctly > should return {"name":"John","age":30,"city":"New York"} for "Object: {"name":"John","age":30,"city":"New York"}. That's all!" 1`] = `
{
"age": 30,
"city": "New York",
"name": "John",
}
`;
exports[`parseObjectOutput - handles valid objects correctly > should return {"a":1,"b":2,"c":3} for {"a":1,"b":2,"c":3} 1`] = `
{
"a": 1,
"b": 2,
"c": 3,
}
`;
exports[`parseObjectOutput - handles valid objects correctly > should return {"name":"John","age":30,"city":"New York"} for {"name":"John","age":30,"city":"New York"} 1`] = `
{
"age": 30,
"city": "New York",
"name": "John",
}
`;
exports[`parseStructuredOutput - handles arrays correctly > should parse and return [1, 2, 3] for "[1, 2, 3]" 1`] = `
[
1,
2,
3,
]
`;
exports[`parseStructuredOutput - handles booleans correctly > should parse and return true for "True" 1`] = `true`;
exports[`parseStructuredOutput - handles numbers correctly > should parse and return 123.45 for "123.45" 1`] = `123.45`;
exports[`parseStructuredOutput - handles objects correctly > should parse and return {"a": 1, "b": "two"} for "{"a": 1, "b": "two"}" 1`] = `
{
"a": 1,
"b": "two",
}
`;

Wyświetl plik

@ -0,0 +1,15 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`sanitizeSearchParams 1`] = `"a=1&c=13"`;
exports[`sanitizeSearchParams 2`] = `"a=1&a=2&a=3"`;
exports[`sanitizeSearchParams 3`] = `"b=a&b=b&foo=true"`;
exports[`sanitizeSearchParams 4`] = `"b=false&b=true&b=false"`;
exports[`sanitizeSearchParams 5`] = `"flag=foo&flag=bar&flag=baz&token=test"`;
exports[`sanitizeSearchParams 6`] = `""`;
exports[`sanitizeSearchParams 7`] = `""`;

44
src/_utils.ts 100644
Wyświetl plik

@ -0,0 +1,44 @@
import 'dotenv/config'
import defaultKy, {
type AfterResponseHook,
type BeforeRequestHook,
type KyInstance
} from 'ky'
const AGENTIC_TEST_MOCK_HEADER = 'x-agentic-test-mock'
function defaultBeforeRequest(request: Request): Response {
return new Response(
JSON.stringify({
url: request.url,
method: request.method,
headers: request.headers
}),
{
status: 200,
headers: {
'Content-Type': 'application/json',
[AGENTIC_TEST_MOCK_HEADER]: '1'
}
}
)
}
export function mockKyInstance(
ky: KyInstance = defaultKy,
{
beforeRequest = defaultBeforeRequest,
afterResponse
}: {
beforeRequest?: BeforeRequestHook
afterResponse?: AfterResponseHook
} = {}
): KyInstance {
return ky.extend({
hooks: {
beforeRequest: beforeRequest ? [beforeRequest] : [],
afterResponse: afterResponse ? [afterResponse] : []
}
})
}

Wyświetl plik

@ -0,0 +1,106 @@
import type * as types from './types.ts'
import { AIFunctionsProvider } from './fns.js'
/**
* A set of AI functions intended to make it easier to work with large sets of
* AI functions across different clients.
*
* This class mimics a built-in `Set<AIFunction>`, but with additional utility
* methods like `pick`, `omit`, and `map`.
*
* Function names are case-insensitive to make it easier to match them with
* possible LLM hallucinations.
*/
export class AIFunctionSet implements Iterable<types.AIFunction> {
protected readonly _map: Map<string, types.AIFunction>
constructor(aiFunctionLikeObjects?: types.AIFunctionLike[]) {
const fns = aiFunctionLikeObjects?.flatMap((fn) =>
fn instanceof AIFunctionsProvider
? [...fn.functions]
: fn instanceof AIFunctionSet
? [...fn]
: [fn]
)
this._map = new Map(
fns ? fns.map((fn) => [transformName(fn.spec.name), fn]) : null
)
}
get size(): number {
return this._map.size
}
add(fn: types.AIFunction): this {
this._map.set(transformName(fn.spec.name), fn)
return this
}
get(name: string): types.AIFunction | undefined {
return this._map.get(transformName(name))
}
set(name: string, fn: types.AIFunction): this {
this._map.set(transformName(name), fn)
return this
}
has(name: string): boolean {
return this._map.has(transformName(name))
}
clear(): void {
this._map.clear()
}
delete(name: string): boolean {
return this._map.delete(transformName(name))
}
pick(...keys: string[]): AIFunctionSet {
const keysToIncludeSet = new Set(keys.map(transformName))
return new AIFunctionSet(
Array.from(this).filter((fn) =>
keysToIncludeSet.has(transformName(fn.spec.name))
)
)
}
omit(...keys: string[]): AIFunctionSet {
const keysToExcludeSet = new Set(keys.map(transformName))
return new AIFunctionSet(
Array.from(this).filter(
(fn) => !keysToExcludeSet.has(transformName(fn.spec.name))
)
)
}
map<T>(fn: (fn: types.AIFunction) => T): T[] {
return [...this.entries].map(fn)
}
get specs(): types.AIFunctionSpec[] {
return this.map((fn) => fn.spec)
}
get toolSpecs(): types.AIToolSpec[] {
return this.map((fn) => ({
type: 'function' as const,
function: fn.spec
}))
}
get entries(): IterableIterator<types.AIFunction> {
return this._map.values()
}
[Symbol.iterator](): Iterator<types.AIFunction> {
return this.entries
}
}
function transformName(name: string): string {
// TODO: decamalize?
return name.toLowerCase()
}

32
src/assert.ts 100644
Wyświetl plik

@ -0,0 +1,32 @@
/**
* Slightly modified version of [tiny-invariant](https://github.com/alexreardon/tiny-invariant).
*
* `assert` is used to [assert](https://www.typescriptlang.org/docs/handbook/release-notes/typescript-3-7.html#assertion-functions) that the `condition` is [truthy](https://github.com/getify/You-Dont-Know-JS/blob/bdbe570600d4e1107d0b131787903ca1c9ec8140/up%20%26%20going/ch2.md#truthy--falsy).
*
* 💥 `assert` will `throw` an `Error` if the `condition` is [falsey](https://github.com/getify/You-Dont-Know-JS/blob/bdbe570600d4e1107d0b131787903ca1c9ec8140/up%20%26%20going/ch2.md#truthy--falsy)
*
* @example
*
* ```ts
* const value: Person | null = { name: 'Alex' };
* assert(value, 'Expected value to be a person');
* // type of `value`` has been narrowed to `Person`
* ```
*/
export function assert(
condition: any,
/**
* Can provide a string, or a function that returns a string for cases where
* the message takes a fair amount of effort to compute.
*/
message?: string | (() => string)
): asserts condition {
if (condition) {
return
}
const providedMessage: string | undefined =
typeof message === 'function' ? message() : message
throw new Error(providedMessage ?? 'Assertion failed')
}

Wyświetl plik

@ -0,0 +1,42 @@
import { describe, expect, it } from 'vitest'
import { z } from 'zod'
import { createAIFunction } from './create-ai-function.js'
const fullName = createAIFunction(
{
name: 'fullName',
description: 'Returns the full name of a person.',
inputSchema: z.object({
first: z.string(),
last: z.string()
})
},
async ({ first, last }) => {
return `${first} ${last}`
}
)
describe('createAIFunction()', () => {
it('exposes OpenAI function calling spec', () => {
expect(fullName.spec.name).toEqual('fullName')
expect(fullName.spec.description).toEqual(
'Returns the full name of a person.'
)
expect(fullName.spec.parameters).toEqual({
properties: {
first: { type: 'string' },
last: { type: 'string' }
},
required: ['first', 'last'],
type: 'object',
additionalProperties: false
})
})
it('executes the function', async () => {
expect(await fullName('{"first": "John", "last": "Doe"}')).toEqual(
'John Doe'
)
})
})

Wyświetl plik

@ -0,0 +1,72 @@
import type { z } from 'zod'
import type * as types from './types.js'
import { parseStructuredOutput } from './parse-structured-output.js'
import { assert } from './utils.js'
import { zodToJsonSchema } from './zod-to-json-schema.js'
/**
* Create a function meant to be used with OpenAI tool or function calling.
*
* The returned function will parse the arguments string and call the
* implementation function with the parsed arguments.
*
* The `spec` property of the returned function is the spec for adding the
* function to the OpenAI API `functions` property.
*/
export function createAIFunction<InputSchema extends z.ZodObject<any>, Return>(
spec: {
/** Name of the function. */
name: string
/** Description of the function. */
description?: string
/** Zod schema for the arguments string. */
inputSchema: InputSchema
},
/** Implementation of the function to call with the parsed arguments. */
implementation: (params: z.infer<InputSchema>) => types.MaybePromise<Return>
): types.AIFunction<InputSchema, Return> {
assert(spec.name, 'createAIFunction missing required "spec.name"')
assert(
spec.inputSchema,
'createAIFunction missing required "spec.inputSchema"'
)
assert(implementation, 'createAIFunction missing required "implementation"')
assert(
typeof implementation === 'function',
'createAIFunction "implementation" must be a function'
)
/** Parse the arguments string, optionally reading from a message. */
const parseInput = (input: string | types.Msg) => {
if (typeof input === 'string') {
return parseStructuredOutput(input, spec.inputSchema)
} else {
const args = input.function_call?.arguments
assert(
args,
`Missing required function_call.arguments for function ${spec.name}`
)
return parseStructuredOutput(args, spec.inputSchema)
}
}
// Call the implementation function with the parsed arguments.
const aiFunction: types.AIFunction<InputSchema, Return> = (
input: string | types.Msg
) => {
const parsedInput = parseInput(input)
return implementation(parsedInput)
}
aiFunction.inputSchema = spec.inputSchema
aiFunction.parseInput = parseInput
aiFunction.spec = {
name: spec.name,
description: spec.description?.trim() ?? '',
parameters: zodToJsonSchema(spec.inputSchema)
}
aiFunction.impl = implementation
return aiFunction
}

5
src/errors.ts 100644
Wyświetl plik

@ -0,0 +1,5 @@
export class RetryableError extends Error {}
export class ParseError extends RetryableError {}
export class TimeoutError extends Error {}

106
src/fns.ts 100644
Wyświetl plik

@ -0,0 +1,106 @@
import type { z } from 'zod'
import type * as types from './types.js'
import { AIFunctionSet } from './ai-function-set.js'
import { createAIFunction } from './create-ai-function.js'
import { assert } from './utils.js'
export interface PrivateAIFunctionMetadata {
name: string
description: string
inputSchema: z.AnyZodObject
methodName: string
}
// Polyfill for `Symbol.metadata`
// https://github.com/microsoft/TypeScript/issues/53461
declare global {
interface SymbolConstructor {
readonly metadata: unique symbol
}
}
;(Symbol as any).metadata ??= Symbol.for('Symbol.metadata')
const _metadata = Object.create(null)
if (typeof Symbol === 'function' && Symbol.metadata) {
Object.defineProperty(globalThis, Symbol.metadata, {
enumerable: true,
configurable: true,
writable: true,
value: _metadata
})
}
export abstract class AIFunctionsProvider {
private _functions?: AIFunctionSet
get functions(): AIFunctionSet {
if (!this._functions) {
const metadata = this.constructor[Symbol.metadata]
assert(metadata)
const invocables =
(metadata?.invocables as PrivateAIFunctionMetadata[]) ?? []
// console.log({ metadata, invocables })
const aiFunctions = invocables.map((invocable) => {
const impl = (this as any)[invocable.methodName]
assert(impl)
return createAIFunction(invocable, impl)
})
this._functions = new AIFunctionSet(aiFunctions)
}
return this._functions
}
}
export function aiFunction<
This extends AIFunctionsProvider,
InputSchema extends z.SomeZodObject,
OptionalArgs extends Array<undefined>,
Return extends types.MaybePromise<any>
>({
name,
description,
inputSchema
}: {
name?: string
description: string
inputSchema: InputSchema
}) {
return (
_targetMethod: (
this: This,
input: z.infer<InputSchema>,
...optionalArgs: OptionalArgs
) => Return,
context: ClassMethodDecoratorContext<
This,
(
this: This,
input: z.infer<InputSchema>,
...optionalArgs: OptionalArgs
) => Return
>
) => {
const methodName = String(context.name)
if (!context.metadata.invocables) {
context.metadata.invocables = []
}
;(context.metadata.invocables as PrivateAIFunctionMetadata[]).push({
name: name ?? methodName,
description,
inputSchema,
methodName
})
context.addInitializer(function () {
;(this as any)[methodName] = (this as any)[methodName].bind(this)
})
}
}

12
src/index.ts 100644
Wyświetl plik

@ -0,0 +1,12 @@
export * from './ai-function-set.js'
export * from './create-ai-function.js'
export * from './errors.js'
export * from './fns.js'
export * from './message.js'
export * from './nango.js'
export * from './parse-structured-output.js'
export * from './services/index.js'
export * from './tools/search-and-crawl.js'
export type * from './types.js'
export * from './utils.js'
export * from './zod-to-json-schema.js'

Wyświetl plik

@ -0,0 +1,55 @@
import type * as OpenAI from 'openai-fetch'
import { describe, expect, expectTypeOf, it } from 'vitest'
import type * as types from './types.js'
import { Msg } from './message.js'
describe('Msg', () => {
it('creates a message and fixes indentation', () => {
const msgContent = `
Hello, World!
`
const msg = Msg.system(msgContent)
expect(msg.role).toEqual('system')
expect(msg.content).toEqual('Hello, World!')
})
it('supports disabling indentation fixing', () => {
const msgContent = `
Hello, World!
`
const msg = Msg.system(msgContent, { cleanContent: false })
expect(msg.content).toEqual('\n Hello, World!\n ')
})
it('handles tool calls request', () => {
const msg = Msg.toolCall([
{
id: 'fake-tool-call-id',
type: 'function',
function: {
arguments: '{"prompt": "Hello, World!"}',
name: 'hello'
}
}
])
expectTypeOf(msg).toMatchTypeOf<types.Msg.ToolCall>()
expect(Msg.isToolCall(msg)).toBe(true)
})
it('handles tool call response', () => {
const msg = Msg.toolResult('Hello, World!', 'fake-tool-call-id')
expectTypeOf(msg).toMatchTypeOf<types.Msg.ToolResult>()
expect(Msg.isToolResult(msg)).toBe(true)
})
it('prompt message types should interop with openai-fetch message types', () => {
expectTypeOf({} as OpenAI.ChatMessage).toMatchTypeOf<types.Msg>()
expectTypeOf({} as types.Msg).toMatchTypeOf<OpenAI.ChatMessage>()
expectTypeOf({} as types.Msg.System).toMatchTypeOf<OpenAI.ChatMessage>()
expectTypeOf({} as types.Msg.User).toMatchTypeOf<OpenAI.ChatMessage>()
expectTypeOf({} as types.Msg.Assistant).toMatchTypeOf<OpenAI.ChatMessage>()
expectTypeOf({} as types.Msg.FuncCall).toMatchTypeOf<OpenAI.ChatMessage>()
expectTypeOf({} as types.Msg.FuncResult).toMatchTypeOf<OpenAI.ChatMessage>()
})
})

344
src/message.ts 100644
Wyświetl plik

@ -0,0 +1,344 @@
import type { Jsonifiable } from 'type-fest'
import { cleanStringForModel, stringifyForModel } from './utils.js'
/**
* Generic/default OpenAI message without any narrowing applied.
*/
export interface Msg {
/**
* The contents of the message. `content` is required for all messages, and
* may be null for assistant messages with function calls.
*/
content: string | null
/**
* The role of the messages author. One of `system`, `user`, `assistant`,
* 'tool', or `function`.
*/
role: Msg.Role
/**
* The name and arguments of a function that should be called, as generated
* by the model.
*/
function_call?: Msg.Call.Function
/**
* The tool calls generated by the model, such as function calls.
*/
tool_calls?: Msg.Call.Tool[]
/**
* Tool call that this message is responding to.
*/
tool_call_id?: string
/**
* The name of the author of this message. `name` is required if role is
* `function`, and it should be the name of the function whose response is in the
* `content`. May contain a-z, A-Z, 0-9, and underscores, with a maximum length of
* 64 characters.
*/
name?: string
}
/** Narrowed OpenAI Message types. */
export namespace Msg {
/** Possible roles for a message. */
export type Role = 'system' | 'user' | 'assistant' | 'function' | 'tool'
export namespace Call {
/**
* The name and arguments of a function that should be called, as generated
* by the model.
*/
export type Function = {
/**
* The arguments to call the function with, as generated by the model in
* JSON format.
*/
arguments: string
/** The name of the function to call. */
name: string
}
/** The tool calls generated by the model, such as function calls. */
export type Tool = {
/** The ID of the tool call. */
id: string
/** The type of the tool. Currently, only `function` is supported. */
type: 'function'
/** The function that the model called. */
function: Call.Function
}
}
/** Message with text content for the system. */
export type System = {
role: 'system'
content: string
name?: string
}
/** Message with text content from the user. */
export type User = {
role: 'user'
name?: string
content: string
}
/** Message with text content from the assistant. */
export type Assistant = {
role: 'assistant'
name?: string
content: string
}
/** Message with arguments to call a function. */
export type FuncCall = {
role: 'assistant'
name?: string
content: null
function_call: Call.Function
}
/** Message with the result of a function call. */
export type FuncResult = {
role: 'function'
name: string
content: string
}
/** Message with arguments to call one or more tools. */
export type ToolCall = {
role: 'assistant'
name?: string
content: null
tool_calls: Call.Tool[]
}
/** Message with the result of a tool call. */
export type ToolResult = {
role: 'tool'
tool_call_id: string
content: string
}
}
/** Utility functions for creating and checking message types. */
export namespace Msg {
/** Create a system message. Cleans indentation and newlines by default. */
export function system(
content: string,
opts?: {
/** Custom name for the message. */
name?: string
/** Whether to clean extra newlines and indentation. Defaults to true. */
cleanContent?: boolean
}
): Msg.System {
const { name, cleanContent = true } = opts ?? {}
return {
role: 'system',
content: cleanContent ? cleanStringForModel(content) : content,
...(name ? { name } : {})
}
}
/** Create a user message. Cleans indentation and newlines by default. */
export function user(
content: string,
opts?: {
/** Custom name for the message. */
name?: string
/** Whether to clean extra newlines and indentation. Defaults to true. */
cleanContent?: boolean
}
): Msg.User {
const { name, cleanContent = true } = opts ?? {}
return {
role: 'user',
content: cleanContent ? cleanStringForModel(content) : content,
...(name ? { name } : {})
}
}
/** Create an assistant message. Cleans indentation and newlines by default. */
export function assistant(
content: string,
opts?: {
/** Custom name for the message. */
name?: string
/** Whether to clean extra newlines and indentation. Defaults to true. */
cleanContent?: boolean
}
): Msg.Assistant {
const { name, cleanContent = true } = opts ?? {}
return {
role: 'assistant',
content: cleanContent ? cleanStringForModel(content) : content,
...(name ? { name } : {})
}
}
/** Create a function call message with argumets. */
export function funcCall(
function_call: {
/** Name of the function to call. */
name: string
/** Arguments to pass to the function. */
arguments: string
},
opts?: {
/** The name descriptor for the message.(message.name) */
name?: string
}
): Msg.FuncCall {
return {
...opts,
role: 'assistant',
content: null,
function_call
}
}
/** Create a function result message. */
export function funcResult(
content: Jsonifiable,
name: string
): Msg.FuncResult {
const contentString = stringifyForModel(content)
return { role: 'function', content: contentString, name }
}
/** Create a function call message with argumets. */
export function toolCall(
tool_calls: Msg.Call.Tool[],
opts?: {
/** The name descriptor for the message.(message.name) */
name?: string
}
): Msg.ToolCall {
return {
...opts,
role: 'assistant',
content: null,
tool_calls
}
}
/** Create a tool call result message. */
export function toolResult(
content: Jsonifiable,
tool_call_id: string,
opts?: {
/** The name of the tool which was called */
name?: string
}
): Msg.ToolResult {
const contentString = stringifyForModel(content)
return { ...opts, role: 'tool', tool_call_id, content: contentString }
}
/** Get the narrowed message from an EnrichedResponse. */
export function getMessage(
// @TODO
response: any
// response: ChatModel.EnrichedResponse
): Msg.Assistant | Msg.FuncCall | Msg.ToolCall {
const msg = response.choices[0].message as Msg
return narrowResponseMessage(msg)
}
/** Narrow a message received from the API. It only responds with role=assistant */
export function narrowResponseMessage(
msg: Msg
): Msg.Assistant | Msg.FuncCall | Msg.ToolCall {
if (msg.content === null && msg.tool_calls != null) {
return Msg.toolCall(msg.tool_calls)
} else if (msg.content === null && msg.function_call != null) {
return Msg.funcCall(msg.function_call)
} else if (msg.content !== null) {
return Msg.assistant(msg.content)
} else {
// @TODO: probably don't want to error here
console.log('Invalid message', msg)
throw new Error('Invalid message')
}
}
/** Check if a message is a system message. */
export function isSystem(message: Msg): message is Msg.System {
return message.role === 'system'
}
/** Check if a message is a user message. */
export function isUser(message: Msg): message is Msg.User {
return message.role === 'user'
}
/** Check if a message is an assistant message. */
export function isAssistant(message: Msg): message is Msg.Assistant {
return message.role === 'assistant' && message.content !== null
}
/** Check if a message is a function call message with arguments. */
export function isFuncCall(message: Msg): message is Msg.FuncCall {
return message.role === 'assistant' && message.function_call != null
}
/** Check if a message is a function result message. */
export function isFuncResult(message: Msg): message is Msg.FuncResult {
return message.role === 'function' && message.name != null
}
/** Check if a message is a tool calls message. */
export function isToolCall(message: Msg): message is Msg.ToolCall {
return message.role === 'assistant' && message.tool_calls != null
}
/** Check if a message is a tool call result message. */
export function isToolResult(message: Msg): message is Msg.ToolResult {
return message.role === 'tool' && !!message.tool_call_id
}
/** Narrow a ChatModel.Message to a specific type. */
export function narrow(message: Msg.System): Msg.System
export function narrow(message: Msg.User): Msg.User
export function narrow(message: Msg.Assistant): Msg.Assistant
export function narrow(message: Msg.FuncCall): Msg.FuncCall
export function narrow(message: Msg.FuncResult): Msg.FuncResult
export function narrow(message: Msg.ToolCall): Msg.ToolCall
export function narrow(message: Msg.ToolResult): Msg.ToolResult
export function narrow(
message: Msg
):
| Msg.System
| Msg.User
| Msg.Assistant
| Msg.FuncCall
| Msg.FuncResult
| Msg.ToolCall
| Msg.ToolResult {
if (isSystem(message)) {
return message
}
if (isUser(message)) {
return message
}
if (isAssistant(message)) {
return message
}
if (isFuncCall(message)) {
return message
}
if (isFuncResult(message)) {
return message
}
if (isToolCall(message)) {
return message
}
if (isToolResult(message)) {
return message
}
throw new Error('Invalid message type')
}
}

47
src/nango.ts 100644
Wyświetl plik

@ -0,0 +1,47 @@
import { type Connection, Nango } from '@nangohq/node'
import { getEnv } from './utils.js'
// This is intentionally left as a global singleton to avoid re-creating the
// Nango connection instance on successive calls in serverless environments.
let _nango: Nango | null = null
export function getNango(): Nango {
if (!_nango) {
const secretKey = getEnv('NANGO_SECRET_KEY')?.trim()
if (!secretKey) {
throw new Error(`Missing required "NANGO_SECRET_KEY"`)
}
_nango = new Nango({ secretKey })
}
return _nango
}
export function validateNangoConnectionOAuthScopes({
connection,
scopes
}: {
connection: Connection
scopes: string[]
}) {
const connectionScopes = new Set<string>(
connection.credentials.raw.scope.split(' ')
)
const missingScopes = new Set<string>()
for (const scope of scopes) {
if (!connectionScopes.has(scope)) {
missingScopes.add(scope)
}
}
if (missingScopes.size > 0) {
throw new Error(
`Nango connection ${connection.id} is missing required OAuth scopes: ${[
...missingScopes.values()
].join(', ')}`
)
}
}

Wyświetl plik

@ -0,0 +1,273 @@
import { assert, expect, test } from 'vitest'
import { z } from 'zod'
import {
extractJSONFromString,
parseArrayOutput,
parseBooleanOutput,
parseNumberOutput,
parseObjectOutput,
parseStructuredOutput
} from './parse-structured-output.js'
test('extractJSONFromString should extract JSON object from string', () => {
let jsonStr = 'Some text {"name":"John Doe"} more text'
let result = extractJSONFromString(jsonStr, 'object')
assert.deepEqual(result[0], { name: 'John Doe' })
jsonStr =
'Some text {"name":"John Doe","age":42,"address":{"street":"Main Street","number":42}} more text'
result = extractJSONFromString(jsonStr, 'object')
assert.deepEqual(result[0], {
name: 'John Doe',
age: 42,
address: { street: 'Main Street', number: 42 }
})
jsonStr = 'foo {"name":"John Doe","school":"St. John\'s"} bar'
result = extractJSONFromString(jsonStr, 'object')
assert.deepEqual(result[0], { name: 'John Doe', school: "St. John's" })
})
test('extractJSONFromString should extract an invalid JSON object from string', () => {
let jsonStr = 'Some text {"name":\'John Doe\'} more text'
let result = extractJSONFromString(jsonStr, 'object')
assert.deepEqual(result[0], { name: 'John Doe' })
jsonStr = 'Some text {"name":"John Doe","age":42,} more text'
result = extractJSONFromString(jsonStr, 'object')
assert.deepEqual(result[0], { name: 'John Doe', age: 42 })
})
test('extractJSONFromString should extract multiple JSON objects from string', () => {
let jsonStr = 'Some text {"name":"John Doe"} more text {"name":"Jane Doe"}'
let result = extractJSONFromString(jsonStr, 'object')
assert.deepEqual(result[0], { name: 'John Doe' })
assert.deepEqual(result[1], { name: 'Jane Doe' })
jsonStr =
'Some text {"name":"John Doe","age":42,"address":{"street":"Main Street","number":42}} more text {"name":"Jane Doe","age":42,"address":{"street":"Main Street","number":42}}'
result = extractJSONFromString(jsonStr, 'object')
assert.deepEqual(result[0], {
name: 'John Doe',
age: 42,
address: { street: 'Main Street', number: 42 }
})
assert.deepEqual(result[1], {
name: 'Jane Doe',
age: 42,
address: { street: 'Main Street', number: 42 }
})
})
test('extractJSONFromString should extract JSON array from string', () => {
let jsonString = 'Some text [1,2,3] more text'
let result = extractJSONFromString(jsonString, 'array')
assert.deepEqual(result[0], [1, 2, 3])
jsonString = 'Some text ["foo","bar","\'quoted\'"] more text'
result = extractJSONFromString(jsonString, 'array')
assert.deepEqual(result[0], ['foo', 'bar', "'quoted'"])
})
test('extractJSONFromString should extract an invalid JSON array from string', () => {
let jsonString = 'Some text [1,2,3,] more text'
let result = extractJSONFromString(jsonString, 'array')
assert.deepEqual(result[0], [1, 2, 3])
jsonString = "Some text ['foo','bar'] more text"
result = extractJSONFromString(jsonString, 'array')
assert.deepEqual(result[0], ['foo', 'bar'])
})
test('extractJSONFromString should extract multiple JSON arrays from string', () => {
const jsonString = 'Some text [1,2,3] more text [4,5,6]'
const result = extractJSONFromString(jsonString, 'array')
assert.deepEqual(result[0], [1, 2, 3])
assert.deepEqual(result[1], [4, 5, 6])
})
test('extractJSONFromString should return an empty array if no JSON object is found', () => {
const jsonString = 'Some text'
const result = extractJSONFromString(jsonString, 'object')
assert.deepEqual(result, [])
})
test('extractJSONFromString should return an empty array if no JSON array is found', () => {
const jsonString = 'Some text'
const result = extractJSONFromString(jsonString, 'array')
assert.deepEqual(result, [])
})
test('parseArrayOutput - handles valid arrays correctly', () => {
const output1 = parseArrayOutput('[1,2,3]')
const output2 = parseArrayOutput('["a", "b", "c"]')
const output3 = parseArrayOutput('[{"a": 1}, {"b": 2}]')
expect(output1).toMatchSnapshot('should return [1, 2, 3] for "[1,2,3]"')
expect(output2).toMatchSnapshot(
'should return ["a", "b", "c"] for "["a", "b", "c"]'
)
expect(output3).toMatchSnapshot(
'should return [{"a": 1}, {"b": 2}] for [{"a": 1}, {"b": 2}]'
)
})
test('parseArrayOutput - handles arrays surrounded by text correctly', () => {
const output1 = parseArrayOutput('The array is [1,2,3]')
const output2 = parseArrayOutput('Array: ["a", "b", "c"]. That\'s all!')
const output3 = parseArrayOutput(
'This is the array [{"a": 1}, {"b": 2}] in the text'
)
expect(output1).toMatchSnapshot(
'should return [1, 2, 3] for "The array is [1,2,3]"'
)
expect(output2).toMatchSnapshot(
'should return ["a", "b", "c"] for "Array: ["a", "b", "c"]. That\'s all!"'
)
expect(output3).toMatchSnapshot(
'should return [{"a": 1}, {"b": 2}] for "This is the array [{"a": 1}, {"b": 2}] in the text"'
)
})
test('parseArrayOutput - throws error for invalid arrays', () => {
assert.throws(() => {
parseArrayOutput('not a valid array')
})
})
test('parseObjectOutput - handles valid objects correctly', () => {
const output1 = parseObjectOutput('{"a":1,"b":2,"c":3}')
const output2 = parseObjectOutput(
'{"name":"John","age":30,"city":"New York"}'
)
expect(output1).toMatchSnapshot(
'should return {"a":1,"b":2,"c":3} for {"a":1,"b":2,"c":3}'
)
expect(output2).toMatchSnapshot(
'should return {"name":"John","age":30,"city":"New York"} for {"name":"John","age":30,"city":"New York"}'
)
})
test('parseObjectOutput - handles objects surrounded by text correctly', () => {
const output1 = parseObjectOutput('The object is {"a":1,"b":2,"c":3}')
const output2 = parseObjectOutput(
'Object: {"name":"John","age":30,"city":"New York"}. That\'s all!'
)
expect(output1).toMatchSnapshot(
'should return {"a":1,"b":2,"c":3} for "The object is {"a":1,"b":2,"c":3}"'
)
expect(output2).toMatchSnapshot(
'should return {"name":"John","age":30,"city":"New York"} for "Object: {"name":"John","age":30,"city":"New York"}. That\'s all!"'
)
})
test('parseObjectOutput - handles JSON array of objects', () => {
const output = parseObjectOutput('[{"a":1,"b":2},{"c":3,"d":4}]')
expect(output).toMatchSnapshot(
'should return first object {"a":1,"b":2} for [{"a":1,"b":2},{"c":3,"d":4}]'
)
})
test('parseObjectOutput - throws error for invalid objects', () => {
assert.throws(() => {
parseObjectOutput('not a valid object')
})
})
test('parseBooleanOutput - handles `true` outputs correctly', () => {
const output1 = parseBooleanOutput('True')
const output2 = parseBooleanOutput('TRUE')
const output3 = parseBooleanOutput('true.')
expect(output1).toMatchSnapshot('should return true for "True"')
expect(output2).toMatchSnapshot('should return true for "TRUE"')
expect(output3).toMatchSnapshot('should return true for "true."')
})
test('parseBooleanOutput - handles `false` outputs correctly', () => {
const output1 = parseBooleanOutput('False')
const output2 = parseBooleanOutput('FALSE')
const output3 = parseBooleanOutput('false!')
expect(output1).toMatchSnapshot('should return false for "False"')
expect(output2).toMatchSnapshot('should return false for "FALSE"')
expect(output3).toMatchSnapshot('should return false for "false!"')
})
test('parseBooleanOutput - throws error for invalid outputs', () => {
assert.throws(() => {
parseBooleanOutput('NotBooleanValue')
})
})
test('parseNumberOutput - handles integer outputs correctly', () => {
const output1 = parseNumberOutput('42', z.number().int())
const output2 = parseNumberOutput(' -5 ', z.number().int())
expect(output1).toMatchSnapshot('should return 42 for "42"')
expect(output2).toMatchSnapshot('should return -5 for " -5 "')
})
test('parseNumberOutput - handles float outputs correctly', () => {
const output1 = parseNumberOutput('42.42', z.number())
const output2 = parseNumberOutput(' -5.5 ', z.number())
expect(output1).toMatchSnapshot('should return 42.42 for "42.42"')
expect(output2).toMatchSnapshot('should return -5.5 for " -5.5 "')
})
test('parseNumberOutput - throws error for invalid outputs', () => {
assert.throws(() => {
parseNumberOutput('NotANumber', z.number())
})
})
test('parseStructuredOutput - handles arrays correctly', () => {
const arraySchema = z.array(z.number())
const output = '[1, 2, 3]'
const result = parseStructuredOutput(output, arraySchema)
expect(result).toMatchSnapshot(
'should parse and return [1, 2, 3] for "[1, 2, 3]"'
)
})
test('parseStructuredOutput - handles objects correctly', () => {
const objectSchema = z.object({ a: z.number(), b: z.string() })
const output = '{"a": 1, "b": "two"}'
const result = parseStructuredOutput(output, objectSchema)
expect(result).toMatchSnapshot(
'should parse and return {"a": 1, "b": "two"} for "{"a": 1, "b": "two"}"'
)
})
test('parseStructuredOutput - handles booleans correctly', () => {
const booleanSchema = z.boolean()
const output = 'True'
const result = parseStructuredOutput(output, booleanSchema)
expect(result).toMatchSnapshot('should parse and return true for "True"')
})
test('parseStructuredOutput - handles numbers correctly', () => {
const numberSchema = z.number()
const output = '123.45'
const result = parseStructuredOutput(output, numberSchema)
expect(result).toMatchSnapshot('should parse and return 123.45 for "123.45"')
})
test('parseStructuredOutput - throws error for invalid data', () => {
const numberSchema = z.number()
const output = 'not a number'
assert.throws(() => {
parseStructuredOutput(output, numberSchema)
})
})

Wyświetl plik

@ -0,0 +1,283 @@
import type { JsonValue } from 'type-fest'
import { jsonrepair, JSONRepairError } from 'jsonrepair'
import { z, type ZodType } from 'zod'
import { ParseError } from './errors.js'
export type SafeParseResult<T> =
| {
success: true
data: T
error?: never
}
| {
success: false
data?: never
error: string
}
/**
* Parses a string which is expected to contain a structured JSON value.
*
* The JSON value is fuzzily parsed in order to support common issues like
* missing commas, trailing commas, and unquoted keys.
*
* The JSON value is then parsed against a `zod` schema to enforce the shape of
* the output.
*
* @param output - string to parse
* @param outputSchema - zod schema
*
* @returns parsed output
*/
export function parseStructuredOutput<T>(
output: string,
outputSchema: ZodType<T>
): T {
let result
if (outputSchema instanceof z.ZodArray) {
result = parseArrayOutput(output)
} else if (outputSchema instanceof z.ZodObject) {
result = parseObjectOutput(output)
} else if (outputSchema instanceof z.ZodBoolean) {
result = parseBooleanOutput(output)
} else if (outputSchema instanceof z.ZodNumber) {
result = parseNumberOutput(output, outputSchema)
} else {
// Default to string output...
result = output
}
// TODO: fix typescript issue here with recursive types
const safeResult = (outputSchema.safeParse as any)(result)
if (!safeResult.success) {
throw new ParseError(safeResult.error)
}
return safeResult.data
}
export function safeParseStructuredOutput<T>(
output: string,
outputSchema: ZodType<T>
): SafeParseResult<T> {
try {
const data = parseStructuredOutput<T>(output, outputSchema)
return {
success: true,
data
}
} catch (err: any) {
return {
success: false,
error: err.message
}
}
}
/**
* Checks if character at the specified index in a string is escaped.
*
* @param str - string to check
* @param i - index of the character to check
* @returns whether the character is escaped
*/
function isEscaped(str: string, i: number): boolean {
return i > 0 && str[i - 1] === '\\' && !(i > 1 && str[i - 2] === '\\')
}
/**
* Extracts JSON objects or arrays from a string.
*
* @param input - string to extract JSON from
* @param jsonStructureType - type of JSON structure to extract
* @returns array of extracted JSON objects or arrays
*/
export function extractJSONFromString(
input: string,
jsonStructureType: 'object' | 'array'
) {
const startChar = jsonStructureType === 'object' ? '{' : '['
const endChar = jsonStructureType === 'object' ? '}' : ']'
const extractedJSONValues: JsonValue[] = []
let nestingLevel = 0
let startIndex = 0
const isInsideQuoted = { '"': false, "'": false }
for (let i = 0; i < input.length; i++) {
const ch = input.charAt(i)
switch (ch) {
case '"':
case "'":
if (!isInsideQuoted[ch === '"' ? "'" : '"'] && !isEscaped(input, i)) {
isInsideQuoted[ch] = !isInsideQuoted[ch]
}
break
default:
if (!isInsideQuoted['"'] && !isInsideQuoted["'"]) {
switch (ch) {
case startChar:
if (nestingLevel === 0) {
startIndex = i
}
nestingLevel += 1
break
case endChar:
nestingLevel -= 1
if (nestingLevel === 0) {
const candidate = input.slice(startIndex, i + 1)
const parsed = JSON.parse(jsonrepair(candidate))
if (parsed && typeof parsed === 'object') {
extractedJSONValues.push(parsed as JsonValue)
}
} else if (nestingLevel < 0) {
throw new ParseError(
`Invalid JSON string: unexpected ${endChar} at position ${i}`
)
}
}
}
}
}
if (nestingLevel !== 0) {
throw new ParseError(
'Invalid JSON string: unmatched ' + startChar + ' or ' + endChar
)
}
return extractedJSONValues
}
const BOOLEAN_OUTPUTS: Record<string, boolean> = {
true: true,
false: false,
t: true,
f: false,
yes: true,
no: false,
y: true,
n: false,
'1': true,
'0': false
}
/**
* Parses an array output from a string.
*
* @param output - string to parse
* @returns parsed array
*/
export function parseArrayOutput(output: string): Array<any> {
try {
const arrayOutput = extractJSONFromString(output, 'array')
if (arrayOutput.length === 0) {
throw new ParseError(`Invalid JSON array: ${output}`)
}
const parsedOutput = arrayOutput[0]
if (!Array.isArray(parsedOutput)) {
throw new ParseError(
`Invalid JSON array: ${JSON.stringify(parsedOutput)}`
)
}
return parsedOutput
} catch (err: any) {
if (err instanceof JSONRepairError) {
throw new ParseError(err.message, { cause: err })
} else if (err instanceof SyntaxError) {
throw new ParseError(`Invalid JSON array: ${err.message}`, { cause: err })
} else {
throw err
}
}
}
/**
* Parses an object output from a string.
*
* @param output - string to parse
* @returns parsed object
*/
export function parseObjectOutput(output: string) {
try {
const arrayOutput = extractJSONFromString(output, 'object')
if (arrayOutput.length === 0) {
throw new ParseError(`Invalid JSON object: ${output}`)
}
let parsedOutput = arrayOutput[0]
if (Array.isArray(parsedOutput)) {
// TODO
parsedOutput = parsedOutput[0]
} else if (typeof parsedOutput !== 'object') {
throw new ParseError(
`Invalid JSON object: ${JSON.stringify(parsedOutput)}`
)
}
return parsedOutput
} catch (err: any) {
if (err instanceof JSONRepairError) {
throw new ParseError(err.message, { cause: err })
} else if (err instanceof SyntaxError) {
throw new ParseError(`Invalid JSON object: ${err.message}`, {
cause: err
})
} else {
throw err
}
}
}
/**
* Parses a boolean output from a string.
*
* @param output - string to parse
* @returns parsed boolean
*/
export function parseBooleanOutput(output: string): boolean {
output = output
.toLowerCase()
.trim()
.replace(/[!.?]+$/, '')
const booleanOutput = BOOLEAN_OUTPUTS[output]
if (booleanOutput === undefined) {
throw new ParseError(`Invalid boolean output: ${output}`)
} else {
return booleanOutput
}
}
/**
* Parses a number output from a string.
*
* @param output - string to parse
* @param outputSchema - zod number schema
* @returns parsed number
*/
export function parseNumberOutput(
output: string,
outputSchema: z.ZodNumber
): number {
output = output.trim()
const numberOutput = outputSchema.isInt
? Number.parseInt(output)
: Number.parseFloat(output)
if (Number.isNaN(numberOutput)) {
throw new ParseError(`Invalid number output: ${output}`)
}
return numberOutput
}

1
src/reset.d.ts vendored 100644
Wyświetl plik

@ -0,0 +1 @@
import '@total-typescript/ts-reset'

23
src/sdks/ai-sdk.ts 100644
Wyświetl plik

@ -0,0 +1,23 @@
import { tool } from 'ai'
import type { AIFunctionLike } from '../types.js'
import { AIFunctionSet } from '../ai-function-set.js'
/**
* Converts a set of Agentic stdlib AI functions to an object compatible with
* the Vercel AI SDK's `tools` parameter.
*/
export function createAISDKTools(...aiFunctionLikeTools: AIFunctionLike[]) {
const fns = new AIFunctionSet(aiFunctionLikeTools)
return Object.fromEntries(
fns.map((fn) => [
fn.spec.name,
tool({
description: fn.spec.description,
parameters: fn.inputSchema,
execute: fn.impl
})
])
)
}

25
src/sdks/dexter.ts 100644
Wyświetl plik

@ -0,0 +1,25 @@
import { createAIFunction } from '@dexaai/dexter'
import type { AIFunctionLike } from '../types.js'
import { AIFunctionSet } from '../ai-function-set.js'
/**
* Converts a set of Agentic stdlib AI functions to an array of Dexter-
* compatible AI functions.
*/
export function createDexterFunctions(
...aiFunctionLikeTools: AIFunctionLike[]
) {
const fns = new AIFunctionSet(aiFunctionLikeTools)
return fns.map((fn) =>
createAIFunction(
{
name: fn.spec.name,
description: fn.spec.description,
argsSchema: fn.inputSchema
},
fn.impl
)
)
}

25
src/sdks/genkit.ts 100644
Wyświetl plik

@ -0,0 +1,25 @@
import { defineTool } from '@genkit-ai/ai'
import { z } from 'zod'
import type { AIFunctionLike } from '../types.js'
import { AIFunctionSet } from '../ai-function-set.js'
/**
* Converts a set of Agentic stdlib AI functions to an array of Genkit-
* compatible tools.
*/
export function createGenkitTools(...aiFunctionLikeTools: AIFunctionLike[]) {
const fns = new AIFunctionSet(aiFunctionLikeTools)
return fns.map((fn) =>
defineTool(
{
name: fn.spec.name,
description: fn.spec.description,
inputSchema: fn.inputSchema,
outputSchema: z.any()
},
fn.impl
)
)
}

Wyświetl plik

@ -0,0 +1,27 @@
import { DynamicStructuredTool } from '@langchain/core/tools'
import type { AIFunctionLike } from '../types.js'
import { AIFunctionSet } from '../ai-function-set.js'
import { stringifyForModel } from '../utils.js'
/**
* Converts a set of Agentic stdlib AI functions to an array of LangChain-
* compatible tools.
*/
export function createLangChainTools(...aiFunctionLikeTools: AIFunctionLike[]) {
const fns = new AIFunctionSet(aiFunctionLikeTools)
return fns.map(
(fn) =>
new DynamicStructuredTool({
name: fn.spec.name,
description: fn.spec.description,
schema: fn.inputSchema,
func: async (input) => {
const result = await Promise.resolve(fn.impl(input))
// LangChain tools require the output to be a string
return stringifyForModel(result)
}
})
)
}

Wyświetl plik

@ -0,0 +1,22 @@
import { FunctionTool } from 'llamaindex'
import type { AIFunctionLike } from '../types.js'
import { AIFunctionSet } from '../ai-function-set.js'
/**
* Converts a set of Agentic stdlib AI functions to an array of LlamaIndex-
* compatible tools.
*/
export function createLlamaIndexTools(
...aiFunctionLikeTools: AIFunctionLike[]
) {
const fns = new AIFunctionSet(aiFunctionLikeTools)
return fns.map((fn) =>
FunctionTool.from(fn.impl, {
name: fn.spec.name,
description: fn.spec.description,
parameters: fn.spec.parameters as any
})
)
}

Wyświetl plik

@ -0,0 +1,310 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, omit } from '../utils.js'
export namespace bing {
export const API_BASE_URL = 'https://api.bing.microsoft.com'
export interface SearchQuery {
q: string
mkt?: string
offset?: number
count?: number
safeSearch?: 'Off' | 'Moderate' | 'Strict'
textDecorations?: boolean
textFormat?: 'Raw' | 'HTML'
}
export interface SearchResponse {
_type: string
entities: Entities
images: Images
places: Places
queryContext: QueryContext
rankingResponse: RankingResponse
relatedSearches: RelatedSearches
videos: Videos
webPages: WebPages
}
interface Entities {
value: EntitiesValue[]
}
interface EntitiesValue {
bingId: string
contractualRules: PurpleContractualRule[]
description: string
entityPresentationInfo: EntityPresentationInfo
id: string
image: Image
name: string
webSearchUrl: string
}
interface PurpleContractualRule {
_type: string
license?: DeepLink
licenseNotice?: string
mustBeCloseToContent: boolean
targetPropertyName: string
text?: string
url?: string
}
interface DeepLink {
name: string
url: string
}
interface EntityPresentationInfo {
entityScenario: string
entityTypeHints: string[]
}
interface Image {
height: number
hostPageUrl: string
name: string
provider: Provider[]
sourceHeight: number
sourceWidth: number
thumbnailUrl: string
width: number
}
interface Provider {
_type: string
url: string
}
interface Images {
id: string
isFamilyFriendly: boolean
readLink: string
value: ImagesValue[]
webSearchUrl: string
}
interface ImagesValue {
contentSize: string
contentUrl: string
encodingFormat: string
height: number
hostPageDisplayUrl: string
hostPageUrl: string
name: string
thumbnail: Thumbnail
thumbnailUrl: string
webSearchUrl: string
width: number
}
interface Thumbnail {
height: number
width: number
}
interface Places {
value: PlacesValue[]
}
interface PlacesValue {
_type: string
address: Address
entityPresentationInfo: EntityPresentationInfo
id: string
name: string
telephone: string
url: string
webSearchUrl: string
}
interface Address {
addressCountry: string
addressLocality: string
addressRegion: string
neighborhood: string
postalCode: string
}
interface QueryContext {
askUserForLocation: boolean
originalQuery: string
}
interface RankingResponse {
mainline: Mainline
sidebar: Mainline
}
interface Mainline {
items: Item[]
}
interface Item {
answerType: string
resultIndex?: number
value?: ItemValue
}
interface ItemValue {
id: string
}
interface RelatedSearches {
id: string
value: RelatedSearchesValue[]
}
interface RelatedSearchesValue {
displayText: string
text: string
webSearchUrl: string
}
interface Videos {
id: string
isFamilyFriendly: boolean
readLink: string
scenario: string
value: VideosValue[]
webSearchUrl: string
}
interface VideosValue {
allowHttpsEmbed: boolean
allowMobileEmbed: boolean
contentUrl: string
creator: Creator
datePublished: Date
description: string
duration: string
embedHtml: string
encodingFormat: EncodingFormat
height: number
hostPageDisplayUrl: string
hostPageUrl: string
isAccessibleForFree: boolean
isSuperfresh: boolean
motionThumbnailUrl: string
name: string
publisher: Creator[]
thumbnail: Thumbnail
thumbnailUrl: string
viewCount: number
webSearchUrl: string
width: number
}
interface Creator {
name: string
}
enum EncodingFormat {
Mp4 = 'mp4'
}
interface WebPages {
totalEstimatedMatches: number
value: WebPagesValue[]
webSearchUrl: string
}
interface WebPagesValue {
dateLastCrawled: Date
deepLinks?: DeepLink[]
displayUrl: string
id: string
isFamilyFriendly: boolean
isNavigational: boolean
language: string
name: string
snippet: string
thumbnailUrl?: string
url: string
contractualRules?: FluffyContractualRule[]
}
interface FluffyContractualRule {
_type: string
license: DeepLink
licenseNotice: string
mustBeCloseToContent: boolean
targetPropertyIndex: number
targetPropertyName: string
}
}
/**
* Bing web search client.
*
* @see https://www.microsoft.com/en-us/bing/apis/bing-web-search-api
*/
export class BingClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('BING_API_KEY'),
apiBaseUrl = bing.API_BASE_URL,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
apiKey,
'BingClient missing required "apiKey" (defaults to "BING_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({
prefixUrl: this.apiBaseUrl
})
}
@aiFunction({
name: 'bing_web_search',
description:
'Searches the web using the Bing search engine to return the most relevant web pages for a given query. Can also be used to find up-to-date news and information about many topics.',
inputSchema: z.object({
q: z.string().describe('search query')
})
})
async search(queryOrOpts: string | bing.SearchQuery) {
const defaultQuery: Partial<bing.SearchQuery> = {
mkt: 'en-US'
}
const searchParams =
typeof queryOrOpts === 'string'
? {
...defaultQuery,
q: queryOrOpts
}
: {
...defaultQuery,
...queryOrOpts
}
// console.log(searchParams)
const res = await this.ky
.get('v7.0/search', {
headers: {
'Ocp-Apim-Subscription-Key': this.apiKey
},
searchParams
})
.json<bing.SearchResponse>()
return omit(res, 'rankingResponse')
}
}

Wyświetl plik

@ -0,0 +1,676 @@
import defaultKy from 'ky'
import pThrottle from 'p-throttle'
import type { DeepNullable, KyInstance } from '../types.js'
import { assert, delay, getEnv, throttleKy } from '../utils.js'
export namespace clearbit {
// Allow up to 600 requests per minute by default.
export const throttle = pThrottle({
limit: 600,
interval: 60 * 1000
})
export interface CompanyEnrichmentOptions {
domain: string
webhook_url?: string
company_name?: string
linkedin?: string
twitter?: string
facebook?: string
}
export type CompanyNullableProps = {
name: string
legalName: string
domain: string
domainAliases: string[]
site: {
phoneNumbers: string[]
emailAddresses: string[]
}
category: {
sector: string
industryGroup: string
industry: string
subIndustry: string
gicsCode: string
sicCode: string
sic4Codes: string[]
naicsCode: string
naics6Codes: string[]
naics6Codes2022: string[]
}
tags: string[]
description: string
foundedYear: number
location: string
timeZone: string
utcOffset: number
geo: {
streetNumber: string
streetName: string
subPremise: string
streetAddress: string
city: string
postalCode: string
state: string
stateCode: string
country: string
countryCode: string
lat: number
lng: number
}
logo: string
facebook: {
handle: string
likes: number
}
linkedin: {
handle: string
}
twitter: {
handle: string
id: string
bio: string
followers: number
following: number
location: string
site: string
avatar: string
}
crunchbase: {
handle: string
}
emailProvider: boolean
type: string
ticker: string
identifiers: {
usEIN: string
usCIK: string
}
phone: string
metrics: {
alexaUsRank: number
alexaGlobalRank: number
trafficRank: string
employees: number
employeesRange: string
marketCap: string
raised: number
annualRevenue: string
estimatedAnnualRevenue: string
fiscalYearEnd: string
}
indexedAt: string
tech: string[]
techCategories: string[]
parent: {
domain: string
}
ultimateParent: {
domain: string
}
}
export type EmailLookupResponse = DeepNullable<{
id: string
name: {
fullName: string
givenName: string
familyName: string
}
email: string
location: string
timeZone: string
utcOffset: number
geo: {
city: string
state: string
stateCode: string
country: string
countryCode: string
lat: number
lng: number
}
bio: string
site: string
avatar: string
employment: {
domain: string
name: string
title: string
role: string
subRole: string
seniority: string
}
facebook: {
handle: string
}
github: {
handle: string
id: string
avatar: string
company: string
blog: string
followers: number
following: number
}
twitter: {
handle: string
id: string
bio: string
followers: number
following: number
statuses: number
favorites: number
location: string
site: string
avatar: string
}
linkedin: {
handle: string
}
googleplus: {
handle: null
}
gravatar: {
handle: string
urls: {
value: string
title: string
}[]
avatar: string
avatars: {
url: string
type: string
}[]
}
fuzzy: boolean
emailProvider: boolean
indexedAt: string
phone: string
activeAt: string
inactiveAt: string
}>
export type CompanyResponse = {
id: string
} & DeepNullable<CompanyNullableProps>
export interface CompanySearchOptions {
/**
* See clearbit docs: https://dashboard.clearbit.com/docs?shell#discovery-api-tech-queries
* Examples:
* tech:google_apps
* or:(twitter_followers:10000~ type:nonprofit)
*/
query: string
page?: number
page_size?: number
limit?: number
sort?: string
}
export interface CompanySearchResponse {
total: number
page: number
results: CompanyResponse[]
}
export interface BasicCompanyResponse {
domain: string
logo: string
name: string
}
export interface PeopleSearchOptionsV2 {
domains?: string[]
names?: string[]
roles?: string[]
seniorities?: string[]
titles?: string[]
locations?: string[]
employees_ranges?: string[]
company_tags?: string[]
company_tech?: string[]
company_types?: string[]
industries?: string[]
revenue_ranges?: string[]
linkedin_profile_handles?: string[]
page?: number
page_size?: number
suppression?: string
}
// Prospector types
export interface ProspectorResponseV2 {
page: number
page_size: number
total: number
results: PersonAttributesV2[]
}
export interface EmploymentAttributes {
company: string
domain: string
linkedin: string
title: string
role: string
subRole: string
seniority: string
startDate: string
endDate: string
present: boolean
highlight: boolean
}
export interface EmailAttributes {
address: string
type: string
}
export interface PhoneAttributes {
number: string
type: string
}
interface Name {
givenName: string
familyName: string
fullName: string
}
export type PersonAttributesV2 = {
id: string
} & DeepNullable<{
name: Name
avatar: string
location: string
linkedin: string
employments: EmploymentAttributes[]
emails: EmailAttributes[]
phones: PhoneAttributes[]
}>
export type PeopleSearchOptionsV1 = {
domain: string
role?: string
roles?: string[]
seniority?: string
seniorities?: string[]
title?: string
titles?: string[]
city?: string
cities?: string[]
state?: string
states?: string[]
country?: string
countries?: string[]
name?: string
query?: string
page?: number
page_size?: number
suppression?: string
email?: boolean
}
export interface Company {
name: string
}
export interface PeopleSearchResponseV1 {
id: string
name: Name
title: string
role: string
subRole: string
seniority: string
company: Company
email: string
verified: boolean
phone: string
}
export interface ProspectorResponseV1 {
page: number
page_size: number
total: number
results: PeopleSearchResponseV1[]
}
export interface GeoIP {
city: string
state: string
stateCode: string
country: string
countryCode: string
}
export interface CompanyRevealResponse {
ip: string
fuzzy: boolean
domain: string
type: string
company?: CompanyResponse
geoIP: GeoIP
confidenceScore: 'very_high' | 'high' | 'medium' | 'low'
role: string
seniority: string
}
}
/**
* The Clearbit API helps with resolving and enriching people and company data.
*
* @see https://dashboard.clearbit.com/docs
*/
export class ClearbitClient {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly _maxPageSize = 100
static readonly PersonRoles = [
'communications',
'customer_service',
'education',
'engineering',
'finance',
'health_professional',
'human_resources',
'information_technology',
'leadership',
'legal',
'marketing',
'operations',
'product',
'public_relations',
'real_estate',
'recruiting',
'research',
'sales'
]
static readonly SenioritiesV2 = [
'Executive',
'VP',
'Owner',
'Partner',
'Director',
'Manager',
'Senior',
'Entry'
]
static readonly Seniorities = ['executive', 'director', 'manager']
static readonly SubIndustries = [
'Automotive',
'Consumer Discretionary',
'Consumer Goods',
'Consumer Electronics',
'Household Appliances',
'Photography',
'Sporting Goods',
'Apparel, Accessories & Luxury Goods',
'Textiles',
'Textiles, Apparel & Luxury Goods',
'Consumer Services',
'Education Services',
'Specialized Consumer Services',
'Casinos & Gaming',
'Hotels, Restaurants & Leisure',
'Leisure Facilities',
'Restaurants',
'Education',
'Family Services',
'Legal Services',
'Advertising',
'Broadcasting',
'Media',
'Movies & Entertainment',
'Public Relations',
'Publishing',
'Distributors',
'Retailing',
'Home Improvement Retail',
'Homefurnishing Retail',
'Specialty Retail',
'Consumer Staples',
'Food Retail',
'Beverages',
'Agricultural Products',
'Food',
'Food Production',
'Packaged Foods & Meats',
'Tobacco',
'Cosmetics',
'Oil & Gas',
'Banking & Mortgages',
'Accounting',
'Finance',
'Financial Services',
'Asset Management & Custody Banks',
'Diversified Capital Markets',
'Fundraising',
'Investment Banking & Brokerage',
'Payments',
'Insurance',
'Real Estate',
'Eyewear',
'Health & Wellness',
'Health Care',
'Health Care Services',
'Biotechnology',
'Life Sciences Tools & Services',
'Pharmaceuticals',
'Aerospace & Defense',
'Capital Goods',
'Civil Engineering',
'Construction',
'Construction & Engineering',
'Mechanical Engineering',
'Electrical',
'Electrical Equipment',
'Industrials & Manufacturing',
'Industrial Machinery',
'Machinery',
'Trading Companies & Distributors',
'Business Supplies',
'Commercial Printing',
'Corporate & Business',
'Architecture',
'Automation',
'Consulting',
'Design',
'Human Resource & Employment Services',
'Professional Services',
'Research & Consulting Services',
'Industrials',
'Shipping & Logistics',
'Airlines',
'Marine',
'Ground Transportation',
'Transportation',
'Semiconductors',
'Cloud Services',
'Internet',
'Internet Software & Services',
'Data Processing & Outsourced Services',
'Graphic Design',
'Communications',
'Computer Networking',
'Nanotechnology',
'Computer Hardware',
'Technology Hardware, Storage & Peripherals',
'Building Materials',
'Chemicals',
'Commodity Chemicals',
'Containers & Packaging',
'Gold',
'Metals & Mining',
'Paper Products',
'Integrated Telecommunication Services',
'Wireless Telecommunication Services',
'Renewable Energy',
'Energy',
'Utilities'
]
constructor({
apiKey = getEnv('CLEARBIT_API_KEY'),
timeoutMs = 30_000,
throttle = true,
ky = defaultKy
}: {
apiKey?: string
timeoutMs?: number
throttle?: boolean
ky?: KyInstance
} = {}) {
assert(
apiKey,
'ClearbitClient missing required "apiKey" (defaults to "CLEARBIT_API_KEY")'
)
this.apiKey = apiKey
const throttledKy = throttle ? throttleKy(ky, clearbit.throttle) : ky
this.ky = throttledKy.extend({
timeout: timeoutMs,
headers: {
// Authorization: `Basic ${Buffer.from(`${apiKey}:`).toString('base64')}`
Authorization: `Bearer ${apiKey}`
}
})
}
async companyEnrichment(options: clearbit.CompanyEnrichmentOptions) {
return this.ky
.get('https://company-stream.clearbit.com/v2/companies/find', {
searchParams: { ...options }
})
.json<clearbit.CompanyResponse>()
}
async companySearch(options: clearbit.CompanySearchOptions) {
return this.ky
.get('https://discovery.clearbit.com/v1/companies/search', {
searchParams: { ...options }
})
.json<clearbit.CompanySearchResponse>()
}
async companyAutocomplete(name: string) {
return this.ky
.get('https://autocomplete.clearbit.com/v1/companies/suggest', {
searchParams: { query: name }
})
.json<clearbit.BasicCompanyResponse[]>()
}
async prospectorPeopleV2(options: clearbit.PeopleSearchOptionsV2) {
return this.ky
.get('https://prospector.clearbit.com/v2/people/search', {
// @ts-expect-error location is a string[] and searchparams shows a TS error heres
searchParams: {
...options,
page_size: Math.min(
this._maxPageSize,
options.page_size || this._maxPageSize
)
}
})
.json<clearbit.ProspectorResponseV2>()
}
async prospectorPeopleV1(options: clearbit.PeopleSearchOptionsV1) {
return this.ky
.get('https://prospector.clearbit.com/v1/people/search', {
// @ts-expect-error location is a string[] and searchparams shows a TS error heres
searchParams: {
email: false,
...options,
page_size: Math.min(
this._maxPageSize,
options.page_size || this._maxPageSize
)
}
})
.json<clearbit.ProspectorResponseV1>()
}
// TODO Status code = 202 means the response was queued.
// Implement webhook when needed. The polling works well, in most cases we need
// to try again once to get a 200 response.
async emailLookup({
email,
maxRetries = 2
}: {
email: string
maxRetries?: number
}): Promise<clearbit.EmailLookupResponse> {
const url = 'https://person.clearbit.com/v2/people/find'
let response = await this.ky.get(url, {
searchParams: { email }
})
if (response.status !== 202 || !maxRetries) {
return response.json<clearbit.EmailLookupResponse>()
}
if (maxRetries && response.status === 202) {
let count = 0
let running = true
while (running && count < maxRetries) {
console.log(`Email Lookup was queued, retry ${count + 1}.`)
await delay(1000)
response = await this.ky.get(url, {
searchParams: { email }
})
count++
running = response.status === 202
}
return response.json<clearbit.EmailLookupResponse>()
}
throw new Error('clearbit email lookup error 202', { cause: response })
}
async nameToDomain(name: string) {
return this.ky
.get('https://company.clearbit.com/v1/domains/find', {
searchParams: { name }
})
.json<clearbit.BasicCompanyResponse>()
.catch((_) => undefined)
}
async revealCompanyFromIP(ip: string) {
return this.ky
.get('https://reveal.clearbit.com/v1/companies/find', {
searchParams: { ip }
})
.json<clearbit.CompanyRevealResponse>()
.catch((_) => undefined)
}
static filterEmploymentProspectorV2(
companyName: string,
employments: Array<DeepNullable<clearbit.EmploymentAttributes> | null> | null
) {
if (employments && employments.length > 0) {
// We filter by employment endDate because some people could have multiple
// jobs at the same time.
// Here we want to filter by people that actively works at a specific company.
return employments
.filter((item) => !item?.endDate)
.some((item) =>
item?.company?.toLowerCase().includes(companyName.toLowerCase())
)
}
return false
}
}

Wyświetl plik

@ -0,0 +1,65 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { Msg } from '../message.js'
import { assert, getEnv } from '../utils.js'
export namespace dexa {
export const AskDexaOptionsSchema = z.object({
question: z.string().describe('The question to ask Dexa.')
})
export type AskDexaOptions = z.infer<typeof AskDexaOptionsSchema>
}
/**
* Dexa provides answers from the world's best podcasters.
*
* @note The Dexa API is not yet publicly available.
* @see https://dexa.ai
*/
export class DexaClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('DEXA_API_KEY'),
apiBaseUrl = getEnv('DEXA_API_BASE_URL') ?? 'https://dexa.ai',
timeoutMs = 60_000,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
timeoutMs?: number
ky?: KyInstance
} = {}) {
assert(
apiKey,
'DexaClient missing required "apiKey" (defaults to "DEXA_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({ prefixUrl: this.apiBaseUrl, timeout: timeoutMs })
}
@aiFunction({
name: 'ask_dexa',
description:
'Answers questions based on knowledge of trusted experts and podcasters. Example experts include: Andrew Huberman, Tim Ferriss, Lex Fridman, Peter Attia, Seth Godin, Rhonda Patrick, Rick Rubin, and more.',
inputSchema: dexa.AskDexaOptionsSchema
})
async askDexa(opts: dexa.AskDexaOptions) {
return this.ky
.post('api/ask-dexa', {
json: {
secret: this.apiKey,
messages: [Msg.user(opts.question)]
}
})
.json<string>()
}
}

Wyświetl plik

@ -0,0 +1,810 @@
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import {
assert,
getEnv,
omit,
sanitizeSearchParams,
throttleKy
} from '../utils.js'
export namespace diffbot {
export const API_BASE_URL = 'https://api.diffbot.com'
export const KNOWLEDGE_GRAPH_API_BASE_URL = 'https://kg.diffbot.com'
// Allow up to 5 requests per second by default.
// https://docs.diffbot.com/reference/rate-limits
export const throttle = pThrottle({
limit: 5,
interval: 1000,
strict: true
})
export interface ExtractOptions {
/** Specify optional fields to be returned from any fully-extracted pages, e.g.: &fields=querystring,links. See available fields within each API's individual documentation pages.
* @see https://docs.diffbot.com/reference/extract-optional-fields
*/
fields?: string[]
/** (*Undocumented*) Pass paging=false to disable automatic concatenation of multiple-page articles. (By default, Diffbot will concatenate up to 20 pages of a single article.) */
paging?: boolean
/** Pass discussion=false to disable automatic extraction of comments or reviews from pages identified as articles or products. This will not affect pages identified as discussions. */
discussion?: boolean
/** Sets a value in milliseconds to wait for the retrieval/fetch of content from the requested URL. The default timeout for the third-party response is 30 seconds (30000). */
timeout?: number
/** Used to specify the IP address of a custom proxy that will be used to fetch the target page, instead of Diffbot's default IPs/proxies. (Ex: &proxy=168.212.226.204) */
proxy?: string
/** Used to specify the authentication parameters that will be used with the proxy specified in the &proxy parameter. (Ex: &proxyAuth=username:password) */
proxyAuth?: string
/** `none` will instruct Extract to not use proxies, even if proxies have been enabled for this particular URL globally. */
useProxy?: string
/** @see https://docs.diffbot.com/reference/extract-custom-javascript */
customJs?: string
/** @see https://docs.diffbot.com/reference/extract-custom-headers */
customHeaders?: Record<string, string>
}
export interface ExtractAnalyzeOptions extends ExtractOptions {
/** URL of the web page to process */
url: string
/** By default the Analyze API will fully extract all pages that match an existing Automatic API -- articles, products or image pages. Set mode to a specific page-type (e.g., mode=article) to extract content only from that specific page-type. All other pages will simply return the default Analyze fields. */
mode?: string
/** Force any non-extracted pages (those with a type of "other") through a specific API. For example, to route all "other" pages through the Article API, pass &fallback=article. Pages that utilize this functionality will return a fallbackType field at the top-level of the response and a originalType field within each extracted object, both of which will indicate the fallback API used. */
fallback?: string
}
export interface ExtractArticleOptions extends ExtractOptions {
/** URL of the web page to process */
url: string
/** Set the maximum number of automatically-generated tags to return. By default a maximum of ten tags will be returned. */
maxTags?: number
/** Set the minimum relevance score of tags to return, between 0.0 and 1.0. By default only tags with a score equal to or above 0.5 will be returned. */
tagConfidence?: number
/** Used to request the output of the Diffbot Natural Language API in the field naturalLanguage. Example: &naturalLanguage=entities,facts,categories,sentiment. */
naturalLanguage?: string[]
}
export interface ExtractResponse {
request: DiffbotRequest
objects: DiffbotObject[]
}
export type ExtractArticleResponse = ExtractResponse
export interface ExtractAnalyzeResponse extends ExtractResponse {
type: string
title: string
humanLanguage: string
}
export interface DiffbotObject {
date: string
sentiment: number
images: Image[]
author: string
estimatedDate: string
publisherRegion: string
icon: string
diffbotUri: string
siteName: string
type: string
title: string
tags: Tag[]
publisherCountry: string
humanLanguage: string
authorUrl: string
pageUrl: string
html: string
text: string
categories?: ObjectCategory[]
authors: Author[]
breadcrumb?: Breadcrumb[]
items?: ListItem[]
meta?: any
}
export interface ListItem {
title: string
link: string
summary: string
image?: string
}
export interface Author {
name: string
link: string
}
export interface ObjectCategory {
score: number
name: string
id: string
}
export interface Breadcrumb {
link: string
name: string
}
export interface Image {
url: string
diffbotUri: string
naturalWidth: number
naturalHeight: number
width: number
height: number
isCached?: boolean
primary?: boolean
}
export interface Tag {
score: number
sentiment: number
count: number
label: string
uri: string
rdfTypes: string[]
}
export interface DiffbotRequest {
pageUrl: string
api: string
version: number
}
export interface KnowledgeGraphSearchOptions {
type?: 'query' | 'text' | 'queryTextFallback' | 'crawl'
query: string
col?: string
from?: number
size?: number
// NOTE: we only support `json`, so these options are not needed
// We can always convert from json to another format if needed.
// format?: 'json' | 'jsonl' | 'csv' | 'xls' | 'xlsx'
// exportspec?: string
// exportseparator?: string
// exportfile?: string
filter?: string
jsonmode?: 'extended' | 'id'
nonCanonicalFacts?: boolean
noDedupArticles?: boolean
cluster?: 'all' | 'best' | 'dedupe'
report?: boolean
}
export interface KnowledgeGraphEnhanceOptions {
type: EntityType
id?: string
name?: string
url?: string
phone?: string
email?: string
employer?: string
title?: string
school?: string
location?: string
ip?: string
customId?: string
size?: number
threshold?: number
refresh?: boolean
search?: boolean
useCache?: boolean
filter?: string
jsonmode?: 'extended' | 'id'
nonCanonicalFacts?: boolean
}
export interface KnowledgeGraphResponse {
data: KnowledgeGraphNode[]
version: number
hits: number
results: number
kgversion: string
diffbot_type: string
facet?: boolean
errors?: any[]
}
export interface KnowledgeGraphNode {
score: number
esscore?: number
entity: KnowledgeGraphEntity
entity_ctx: any
errors: string[]
callbackQuery: string
upperBound: number
lowerBound: number
count: number
value: string
uri: string
}
export interface KnowledgeGraphEntity {
id: string
diffbotUri: string
type?: string
name: string
images: Image[]
origins: string[]
nbOrigins?: number
gender?: Gender
githubUri?: string
importance?: number
description?: string
homepageUri?: string
allNames?: string[]
skills?: Skill[]
crawlTimestamp?: number
summary?: string
image?: string
types?: string[]
nbIncomingEdges?: number
allUris?: string[]
employments?: Employment[]
locations?: Location[]
location?: Location
allOriginHashes?: string[]
nameDetail?: NameDetail
}
export type EntityType = 'Organization' | 'Place'
export const EnhanceEntityOptionsSchema = z.object({
type: z.enum(['Person', 'Organization']),
id: z
.string()
.optional()
.describe('Diffbot ID of the entity to enhance if known'),
name: z
.union([z.string(), z.array(z.string())])
.optional()
.describe('Name of the entity'),
url: z
.array(z.string())
.optional()
.describe('Origin or homepage URL of the entity'),
phone: z.string().optional().describe('Phone number of the entity'),
email: z.string().optional().describe('Email of the entity'),
employer: z
.string()
.optional()
.describe("Name of the entity's employer (for Person entities)"),
title: z
.string()
.optional()
.describe('Title of the entity (for Person entities)'),
school: z
.string()
.optional()
.describe('School of the entity (for Person entities)'),
location: z.string().optional().describe('Location of the entity'),
ip: z.string().optional().describe('IP address of the entity'),
customId: z.string().optional().describe('User-defined ID for correlation'),
threshold: z.number().optional().describe('Similarity threshold'),
refresh: z
.boolean()
.optional()
.describe(
'If set, will attempt to refresh the entity data by recrawling the source URLs.'
),
search: z
.boolean()
.optional()
.describe(
'If set, will attempt to search the web for the entity and merge the results into its knowledge base.'
),
size: z
.number()
.int()
.positive()
.max(100)
.optional()
.describe('Number of results to return')
})
export type EnhanceEntityOptions = z.infer<typeof EnhanceEntityOptionsSchema>
export interface EnhanceEntityResponse {
version: number
hits: number
kgversion: string
request_ctx: RequestCtx
data: EnhanceEntityResponseDatum[]
errors: any[]
}
export interface RequestCtx {
query: Query
query_ctx: QueryCtx
}
export interface Query {
type: string
name: string[]
}
export interface QueryCtx {
search: string
}
export interface EnhanceEntityResponseDatum {
score: number
esscore: number
entity: Entity
errors: any[]
}
export interface Entity {
name: string
type: EntityType
id: string
summary?: string
description?: string
homepageUri?: string
twitterUri?: string
linkedInUri?: string
githubUri?: string
crunchbaseUri?: string
googlePlusUri?: string
facebookUri?: string
angellistUri?: string
wikipediaUri?: string
diffbotUri?: string
origin?: string
origins?: string[]
allUris?: string[]
// extra metadata
nbOrigins?: number
nbIncomingEdges?: number
nbFollowers?: number
educations?: Education[]
nationalities?: Nationality[]
allNames?: string[]
skills?: Skill[]
children?: Children[]
height?: number
image?: string
images?: Image[]
allOriginHashes?: string[]
nameDetail?: NameDetail
parents?: Parent[]
gender?: Gender
importance?: number
wikipediaPageviewsLastQuarterGrowth?: number
wikipediaPageviewsLastYear?: number
wikipediaPageviewsLastYearGrowth?: number
wikipediaPageviews?: number
wikipediaPageviewsLastQuarter?: number
wikipediaPageviewsGrowth?: number
birthPlace?: BirthPlace
types?: string[]
unions?: Union[]
languages?: Language[]
employments?: Employment[]
birthDate?: DateTime
religion?: Religion
awards?: Award[]
netWorth?: NetWorth
allDescriptions?: string[]
locations?: Location[]
location?: Location
interests?: Interest[]
emailAddresses?: any
age?: number
crawlTimestamp?: number
}
export interface Education {
institution: Institution
isCurrent?: boolean
major?: Major
degree?: Degree
from?: DateTime
to?: DateTime
}
export interface Institution {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Major {}
export interface Degree {
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface DateTime {
str: string
precision: number
timestamp: number
}
export interface Nationality {
name: string
type: string
}
export interface Skill {
name: string
diffbotUri?: string
targetDiffbotId?: string
}
export interface Children {
summary: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Image {
url: string
primary?: boolean
}
export interface NameDetail {
firstName: string
lastName: string
middleName?: string[]
}
export interface Parent {
summary: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
image?: string
}
export interface Gender {
normalizedValue: string
}
export interface BirthPlace {
country: Country
isCurrent: boolean
address: string
city: City
subregion: Subregion
latitude: number
precision: number
surfaceForm: string
region: Region
longitude: number
}
export interface Country {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface City {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Subregion {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Region {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Union {
person: Person
from?: DateTime
to?: DateTime
type?: string
}
export interface Person {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Language {
str: string
normalizedValue: string
}
export interface Employment {
isCurrent?: boolean
employer?: Employer
from?: DateTime
categories?: EmploymentCategory[]
title?: string
to?: DateTime
location?: Location
}
export interface Employer {
summary?: string
image?: string
types?: string[]
name: string
diffbotUri?: string
targetDiffbotId?: string
type: string
}
export interface EmploymentCategory {
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Location {
country?: Country
isCurrent: boolean
address: string
city: City
street: string
metroArea: MetroArea
subregion: Subregion
latitude: number
precision: number
postalCode: string
region?: Region
longitude: number
}
export interface MetroArea {
summary: string
image: string
types: string[]
name: string
diffbotUri: string
targetDiffbotId: string
type: string
}
export interface Religion {
str: string
}
export interface Award {
title: string
date?: DateTime
}
export interface NetWorth {
currency: string
value: number
}
export interface Interest {
name: string
type: string
}
export function pruneEntity(entity: diffbot.Entity) {
return omit(
entity,
'allOriginHashes',
'locations',
'images',
'nationalities',
'awards',
'interests'
)
}
}
/**
* Diffbot provides web page classification and scraping. It also provides
* access to a knowledge graph with the ability to perform person and company
* data enrichment.
*
* @see https://docs.diffbot.com
*/
export class DiffbotClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly kyKnowledgeGraph: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
protected readonly apiKnowledgeGraphBaseUrl: string
constructor({
apiKey = getEnv('DIFFBOT_API_KEY'),
apiBaseUrl = diffbot.API_BASE_URL,
apiKnowledgeGraphBaseUrl = diffbot.KNOWLEDGE_GRAPH_API_BASE_URL,
timeoutMs = 30_000,
throttle = true,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
apiKnowledgeGraphBaseUrl?: string
timeoutMs?: number
throttle?: boolean
ky?: KyInstance
} = {}) {
assert(
apiKey,
`DiffbotClient missing required "apiKey" (defaults to "DIFFBOT_API_KEY")`
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.apiKnowledgeGraphBaseUrl = apiKnowledgeGraphBaseUrl
const throttledKy = throttle ? throttleKy(ky, diffbot.throttle) : ky
this.ky = throttledKy.extend({
prefixUrl: apiBaseUrl,
timeout: timeoutMs
})
this.kyKnowledgeGraph = throttledKy.extend({
prefixUrl: apiKnowledgeGraphBaseUrl,
timeout: timeoutMs
})
}
@aiFunction({
name: 'diffbot_analyze_url',
description:
'Scrapes and extracts structured data from a web page. Also classifies the web page as one of several types (article, product, discussion, job, image, video, list, event, or other).',
inputSchema: z.object({
url: z.string().url().describe('The URL to process.')
})
})
async analyzeUrl(options: diffbot.ExtractAnalyzeOptions) {
return this._extract<diffbot.ExtractAnalyzeResponse>('v3/analyze', options)
}
@aiFunction({
name: 'diffbot_extract_article_from_url',
description:
'Scrapes and extracts clean article text from news articles, blog posts, and other text-heavy web pages.',
inputSchema: z.object({
url: z.string().url().describe('The URL to process.')
})
})
async extractArticleFromUrl(options: diffbot.ExtractArticleOptions) {
return this._extract<diffbot.ExtractArticleResponse>('v3/article', options)
}
@aiFunction({
name: 'diffbot_enhance_entity',
description:
'Resolves and enriches a partial person or organization entity.',
inputSchema: diffbot.EnhanceEntityOptionsSchema.omit({
refresh: true,
search: true,
customId: true,
threshold: true
})
})
async enhanceEntity(opts: diffbot.EnhanceEntityOptions) {
const res = await this.kyKnowledgeGraph
.get('kg/v3/enhance', {
searchParams: sanitizeSearchParams({
...opts,
token: this.apiKey
})
})
.json<diffbot.EnhanceEntityResponse>()
return res.data.map((datum) => diffbot.pruneEntity(datum.entity))
}
async searchKnowledgeGraph(options: diffbot.KnowledgeGraphSearchOptions) {
return this.kyKnowledgeGraph
.get('kg/v3/dql', {
searchParams: {
...options,
token: this.apiKey
}
})
.json<diffbot.KnowledgeGraphResponse>()
}
async enhanceKnowledgeGraph(options: diffbot.KnowledgeGraphEnhanceOptions) {
return this.kyKnowledgeGraph
.get('kg/v3/enhance', {
searchParams: {
...options,
token: this.apiKey
}
})
.json<diffbot.KnowledgeGraphResponse>()
}
protected async _extract<
T extends diffbot.ExtractResponse = diffbot.ExtractResponse
>(endpoint: string, options: diffbot.ExtractOptions): Promise<T> {
const { customJs, customHeaders, ...rest } = options
const searchParams = sanitizeSearchParams({
...rest,
token: this.apiKey
})
const headers = {
...Object.fromEntries(
[['X-Forward-X-Evaluate', customJs]].filter(([, value]) => value)
),
...customHeaders
}
// console.log(`DiffbotClient._extract: ${endpoint}`, searchParams)
return this.ky
.get(endpoint, {
searchParams,
headers,
retry: 1
})
.json<T>()
}
}

Wyświetl plik

@ -0,0 +1,263 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, pruneUndefined } from '../utils.js'
export namespace exa {
export const TextContentsOptionsSchema = z.object({
maxCharacters: z
.number()
.optional()
.describe('The maximum number of characters to return.'),
includeHtmlTags: z
.boolean()
.optional()
.describe('If true, includes HTML tags in the returned text.')
})
export type TextContentsOptions = z.infer<typeof TextContentsOptionsSchema>
export const HighlightsContentsOptionsSchema = z.object({
query: z
.string()
.optional()
.describe('The query string to use for highlights search.'),
numSentences: z
.number()
.optional()
.describe('The number of sentences to return for each highlight.'),
highlightsPerUrl: z
.number()
.optional()
.describe('The number of highlights to return for each URL.')
})
export type HighlightsContentsOptions = z.infer<
typeof HighlightsContentsOptionsSchema
>
export const ContentsOptionsSchema = z.object({
text: z.union([TextContentsOptionsSchema, z.literal(true)]).optional(),
highlights: z
.union([HighlightsContentsOptionsSchema, z.literal(true)])
.optional()
})
export type ContentsOptions = z.infer<typeof ContentsOptionsSchema>
export const BaseSearchOptionsSchema = z.object({
numResults: z
.number()
.optional()
.describe('Number of search results to return.'),
includeDomains: z
.array(z.string())
.optional()
.describe('List of domains to include in the search.'),
excludeDomains: z
.array(z.string())
.optional()
.describe('List of domains to exclude from the search.'),
startCrawlDate: z
.string()
.optional()
.describe(
'Start date for results based on crawl date (ISO 8601 format).'
),
endCrawlDate: z
.string()
.optional()
.describe('End date for results based on crawl date (ISO 8601 format).'),
startPublishedDate: z
.string()
.optional()
.describe(
'Start date for results based on published date (ISO 8601 format).'
),
endPublishedDate: z
.string()
.optional()
.describe(
'End date for results based on published date (ISO 8601 format).'
),
category: z
.string()
.optional()
.describe(
'A data category to focus on, with higher comprehensivity and data cleanliness. Currently, the only category is company.'
),
contents: ContentsOptionsSchema.optional().describe(
'Whether to include the contents of the search results.'
)
})
export type BaseSearchOptions = z.infer<typeof BaseSearchOptionsSchema>
export const RegularSearchOptionsSchema = BaseSearchOptionsSchema.extend({
query: z.string().describe('search query'),
useAutoprompt: z.boolean().optional(),
type: z.enum(['keyword', 'neural', 'magic']).optional()
})
export type RegularSearchOptions = z.infer<typeof RegularSearchOptionsSchema>
export const FindSimilarOptionsSchema = BaseSearchOptionsSchema.extend({
url: z
.string()
.describe('The url for which you would like to find similar links'),
excludeSourceDomain: z
.boolean()
.optional()
.describe('If true, excludes links from the base domain of the input.')
})
export type FindSimilarOptions = z.infer<typeof FindSimilarOptionsSchema>
export const GetContentsOptionsSchema = ContentsOptionsSchema.extend({
ids: z
.array(z.string())
.nonempty()
.describe('Exa IDs of the documents to retrieve.')
})
export type GetContentsOptions = z.infer<typeof GetContentsOptionsSchema>
/**
* Represents a search result object.
*/
export type SearchResult = {
/** The title of the search result. */
title: string | null
/** The URL of the search result. */
url: string
/** The estimated creation date of the content (ISO 8601 format). */
publishedDate?: string
/** The author of the content, if available. */
author?: string
/** Similarity score between the query/url and the result. */
score?: number
/** The temporary Exa ID for the document. */
id: string
/** Text from page */
text?: string
/** The highlights as an array of strings. */
highlights?: string[]
/** The corresponding scores as an array of floats, 0 to 1 */
highlightScores?: number[]
}
/**
* Represents a search response object.
*/
export type SearchResponse = {
/** The list of search results. */
results: SearchResult[]
/** The autoprompt string, if applicable. */
autopromptString?: string
/** Internal ID of this request. */
requestId?: string
}
}
/**
* Web search tailored for LLMs.
*
* @see https://docs.exa.ai
*/
export class ExaClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('EXA_API_KEY'),
apiBaseUrl = getEnv('EXA_API_BASE_URL') ?? 'https://api.exa.ai',
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
apiKey,
'ExaClient missing required "apiKey" (defaults to "EXA_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({
prefixUrl: this.apiBaseUrl,
headers: {
'x-api-key': apiKey
}
})
}
/**
* Performs an Exa search for the given query.
*/
@aiFunction({
name: 'exa_search',
description: 'Search the web for the given query.',
inputSchema: exa.RegularSearchOptionsSchema
})
async search(queryOrOpts: string | exa.RegularSearchOptions) {
const json =
typeof queryOrOpts === 'string' ? { query: queryOrOpts } : queryOrOpts
return this.ky.post('search', { json }).json<exa.SearchResponse>()
}
/**
* Finds similar links to the provided URL.
*/
@aiFunction({
name: 'exa_find_similar',
description: 'Find similar links to the provided URL.',
inputSchema: exa.FindSimilarOptionsSchema
})
async findSimilar(opts: exa.FindSimilarOptions) {
const { excludeSourceDomain, ...rest } = opts
const excludeDomains = (opts.excludeDomains ?? []).concat(
excludeSourceDomain ? [new URL(opts.url).hostname] : []
)
return this.ky
.post('findSimilar', {
json: pruneUndefined({
...rest,
excludeDomains: excludeDomains.length ? excludeDomains : undefined
})
})
.json<exa.SearchResponse>()
}
/**
* Retrieves contents of documents based on a list of Exa document IDs.
*/
@aiFunction({
name: 'exa_get_contents',
description:
'Retrieve contents of documents based on a list of Exa document IDs.',
inputSchema: exa.GetContentsOptionsSchema
})
async getContents({ ids, ...opts }: exa.GetContentsOptions) {
const documentIDs = Array.isArray(ids) ? ids : [ids]
assert(documentIDs.length, 'Must provide at least one document ID')
return this.ky
.post('contents', {
json: {
...opts,
ids: documentIDs
}
})
.json<exa.SearchResponse>()
}
}

Wyświetl plik

@ -0,0 +1,252 @@
import defaultKy, { type KyInstance } from 'ky'
import z from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, delay, getEnv } from '../utils.js'
import { zodToJsonSchema } from '../zod-to-json-schema.js'
export namespace firecrawl {
/**
* Generic parameter interface.
*/
export interface Params {
extractorOptions?: {
extractionSchema: z.ZodSchema | any
mode?: 'llm-extraction'
extractionPrompt?: string
}
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean
data?: Data
error?: string
}
export interface Data {
content?: string
markdown?: string
html?: string
metadata: Metadata
}
export interface Metadata {
title: string
description: string
keywords?: string
robots?: string
ogTitle?: string
ogDescription?: string
ogUrl?: string
ogImage?: string
ogLocaleAlternate?: any[]
ogSiteName?: string
sourceURL?: string
modifiedTime?: string
publishedTime?: string
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean
data?: any
error?: string
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean
jobId?: string
data?: any
error?: string
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean
status: string
jobId?: string
data?: any
error?: string
}
}
/**
* Turn websites into LLM-ready data. Crawl and convert any website into clean
* markdown or structured data.
*
* @see https://www.firecrawl.dev
* @see https://github.com/mendableai/firecrawl
*/
export class FirecrawlClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('FIRECRAWL_API_KEY'),
apiBaseUrl = getEnv('FIRECRAWL_API_BASE_URL') ??
'https://api.firecrawl.dev',
timeoutMs = 60_000,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
timeoutMs?: number
ky?: KyInstance
} = {}) {
assert(
apiKey,
'FirecrawlClient missing required "apiKey" (defaults to "FIRECRAWL_API_KEY")'
)
assert(
apiBaseUrl,
'FirecrawlClient missing required "apiBaseUrl" (defaults to "FIRECRAWL_API_BASE_URL")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({
prefixUrl: apiBaseUrl,
timeout: timeoutMs,
headers: {
Authorization: `Bearer ${this.apiKey}`
}
})
}
@aiFunction({
name: 'firecrawl_scrape_url',
description: 'Scrape the contents of a URL.',
inputSchema: z.object({
url: z.string().url().describe('The URL to scrape.')
})
})
async scrapeUrl(
opts: {
url: string
} & firecrawl.Params
) {
const json = {
...opts
}
if (opts?.extractorOptions?.extractionSchema) {
let schema = opts.extractorOptions.extractionSchema
if (schema instanceof z.ZodSchema) {
schema = zodToJsonSchema(schema)
}
json.extractorOptions = {
mode: 'llm-extraction',
...opts.extractorOptions,
extractionSchema: schema
}
}
const res = await this.ky
.post('v0/scrape', { json })
.json<firecrawl.ScrapeResponse>()
if (!res.success || !res.data) return res
if (res.data.markdown) {
delete res.data.html
delete res.data.content
}
return res
}
async search(
opts: {
query: string
} & firecrawl.Params
) {
return this.ky
.post('v0/search', { json: opts })
.json<firecrawl.SearchResponse>()
}
async crawlUrl({
waitUntilDone = true,
timeoutMs = 30_000,
idempotencyKey,
...params
}: {
url: string
waitUntilDone?: boolean
timeoutMs?: number
idempotencyKey?: string
} & firecrawl.Params) {
const res = await this.ky
.post('v0/crawl', {
json: params,
timeout: timeoutMs,
headers: idempotencyKey
? {
'x-idempotency-key': idempotencyKey
}
: undefined
})
.json<firecrawl.CrawlResponse>()
assert(res.jobId)
if (waitUntilDone) {
return this.waitForCrawlJob({ jobId: res.jobId, timeoutMs })
}
return res
}
async checkCrawlStatus(jobId: string) {
assert(jobId)
return this.ky
.get(`v0/crawl/status/${jobId}`)
.json<firecrawl.JobStatusResponse>()
}
async waitForCrawlJob({
jobId,
timeoutMs = 60_000
}: {
jobId: string
timeoutMs?: number
}) {
assert(jobId)
const start = Date.now()
do {
const res = await this.checkCrawlStatus(jobId)
if (res.status === 'completed') {
return res
}
if (!['active', 'paused', 'pending', 'queued'].includes(res.status)) {
throw new Error(
`Crawl job "${jobId}" failed or was stopped. Status: ${res.status}`
)
}
if (Date.now() - start > timeoutMs) {
throw new Error(
`Timeout waiting for crawl job "${jobId}" to complete: ${res.status}`
)
}
await delay(1000)
} while (true)
}
}

Wyświetl plik

@ -0,0 +1,23 @@
export * from './bing-client.js'
export * from './clearbit-client.js'
export * from './dexa-client.js'
export * from './diffbot-client.js'
export * from './exa-client.js'
export * from './firecrawl-client.js'
export * from './midjourney-client.js'
export * from './novu-client.js'
export * from './people-data-labs-client.js'
export * from './perigon-client.js'
export * from './polygon-client.js'
export * from './predict-leads-client.js'
export * from './proxycurl-client.js'
export * from './scraper-client.js'
export * from './searxng-client.js'
export * from './serpapi-client.js'
export * from './serper-client.js'
export * from './slack-client.js'
export * from './tavily-client.js'
export * from './twilio-client.js'
export * from './weather-client.js'
export * from './wikipedia-client.js'
export * from './wolfram-alpha-client.js'

Wyświetl plik

@ -0,0 +1,195 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { TimeoutError } from '../errors.js'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, delay, getEnv, pruneNullOrUndefined } from '../utils.js'
// TODO: add additional methods for upscaling, variations, etc.
export namespace midjourney {
export const API_BASE_URL = 'https://cl.imagineapi.dev'
export type JobStatus = 'pending' | 'in-progress' | 'completed' | 'failed'
export interface ImagineResponse {
data: Job
}
export interface Job {
id: string
prompt: string
status: JobStatus
user_created: string
date_created: string
results?: string
progress?: string
url?: string
error?: string
upscaled_urls?: string[]
ref?: string
upscaled?: string[]
}
export interface JobOptions {
wait?: boolean
timeoutMs?: number
intervalMs?: number
}
}
/**
* Unofficial Midjourney API client for generative images.
*
* @see https://www.imagineapi.dev
*/
export class MidjourneyClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('MIDJOURNEY_IMAGINE_API_KEY'),
apiBaseUrl = midjourney.API_BASE_URL,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
apiKey,
'MidjourneyClient missing required "apiKey" (defaults to "MIDJOURNEY_IMAGINE_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({
prefixUrl: apiBaseUrl,
headers: {
Authorization: `Bearer ${this.apiKey}`
}
})
}
@aiFunction({
name: 'midjourney_create_images',
description:
'Creates 4 images from a prompt using the Midjourney API. Useful for generating images on the fly.',
inputSchema: z.object({
prompt: z
.string()
.describe(
'Simple, short, comma-separated list of phrases which describe the image you want to generate'
)
})
})
async imagine(
promptOrOptions:
| string
| ({
prompt: string
} & midjourney.JobOptions)
): Promise<midjourney.Job> {
const {
wait = true,
timeoutMs,
intervalMs,
...options
} = typeof promptOrOptions === 'string'
? ({ prompt: promptOrOptions } as {
prompt: string
} & midjourney.JobOptions)
: promptOrOptions
const res = await this.ky
.post('items/images', {
json: { ...options }
})
.json<midjourney.ImagineResponse>()
const job = pruneNullOrUndefined(res.data)
if (!wait) {
return job
}
if (job.status === 'completed' || job.status === 'failed') {
return job
}
return this.waitForJobById(job.id, {
timeoutMs,
intervalMs
})
}
async getJobById(
jobIdOrOptions:
| string
| ({
jobId: string
} & midjourney.JobOptions)
): Promise<midjourney.Job> {
const {
jobId,
wait = true,
timeoutMs,
intervalMs
} = typeof jobIdOrOptions === 'string'
? ({ jobId: jobIdOrOptions } as {
jobId: string
} & midjourney.JobOptions)
: jobIdOrOptions
const res = await this.ky
.get(`items/images/${jobId}`)
.json<midjourney.ImagineResponse>()
const job = pruneNullOrUndefined(res.data)
if (!wait) {
return job
}
if (job.status === 'completed' || job.status === 'failed') {
return job
}
return this.waitForJobById(job.id, {
timeoutMs,
intervalMs
})
}
async waitForJobById(
jobId: string,
{
timeoutMs = 5 * 60 * 1000, // 5 minutes
intervalMs = 1000
}: Omit<midjourney.JobOptions, 'wait'> = {}
): Promise<midjourney.Job> {
const startTimeMs = Date.now()
function checkForTimeout() {
const elapsedTimeMs = Date.now() - startTimeMs
if (elapsedTimeMs >= timeoutMs) {
throw new TimeoutError(
`MidjourneyClient timeout waiting for job "${jobId}"`
)
}
}
do {
checkForTimeout()
const job = await this.getJobById(jobId)
if (job.status === 'completed' || job.status === 'failed') {
return job
}
checkForTimeout()
await delay(intervalMs)
} while (true)
}
}

Wyświetl plik

@ -0,0 +1,161 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv } from '../utils.js'
export namespace novu {
export const API_BASE_URL = 'https://api.novu.co/v1'
/**
* Novu subscriber object.
*/
export type Subscriber = {
/**
* Unique identifier for the subscriber. This can be any value that is meaningful to your application such as a user ID stored in your database or a unique email address.
*/
subscriberId: string
/**
* Email address of the subscriber.
*/
email?: string
/**
* First name of the subscriber.
*/
firstName?: string
/**
* Last name of the subscriber.
*/
lastName?: string
/**
* Phone number of the subscriber.
*/
phone?: string
}
/**
* Response from the Novu API when triggering an event.
*
* @see {@link https://docs.novu.co/api/client-libraries#trigger-event}
*/
export type TriggerEventResponse = {
/**
* Data about the triggered event.
*/
data: {
/**
* Whether the trigger was acknowledged or not.
*/
acknowledged?: boolean
/**
* Status for trigger.
*/
status?: string
/**
* Transaction id for trigger.
*/
transactionId?: string
/**
* In case of an error, this field will contain the error message.
*/
error?: Array<any>
}
}
/**
* Options for triggering an event in Novu.
*/
export type TriggerOptions = {
/**
* Name of the event to trigger. This should match the name of an existing notification template in Novu.
*/
name: string
/**
* Payload to use for the event. This will be used to populate any handlebars placeholders in the notification template.
*/
payload: Record<string, unknown>
/**
* List of subscribers to send the notification to. Each subscriber must at least have a unique `subscriberId` to identify them in Novu and, if not already known to Novu, an `email` address or `phone` number depending on the notification template being used.
*/
to: Subscriber[]
}
}
/**
* The Novu API provides a router for sending notifications across different
* channels like Email, SMS, Chat, In-App, and Push.
*
* @see https://novu.co
*/
export class NovuClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('NOVU_API_KEY'),
apiBaseUrl = novu.API_BASE_URL,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
apiKey,
'NovuClient missing required "apiKey" (defaults to "NOVU_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({
prefixUrl: this.apiBaseUrl,
headers: {
Authorization: `ApiKey ${this.apiKey}`
}
})
}
/**
* Triggers an event in Novu.
*
* @see https://docs.novu.co/api-reference/events/trigger-event
*/
@aiFunction({
name: 'novu_trigger_event',
description:
'Sends a notification to a person given their novu `subscriberId` and an `email` or `phone` number. Useful for sending emails or SMS text messages to people.',
inputSchema: z.object({
name: z.string(),
// TODO: make this more
payload: z.record(z.any()),
to: z.array(
z.object({
subscriberId: z.string(),
email: z.string().optional(),
firstName: z.string().optional(),
lastName: z.string().optional(),
phone: z.string().optional()
})
)
})
})
async triggerEvent(options: novu.TriggerOptions) {
return this.ky
.post('events/trigger', {
json: options
})
.json<novu.TriggerEventResponse>()
}
}

Wyświetl plik

@ -0,0 +1,546 @@
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { assert, getEnv, sanitizeSearchParams, throttleKy } from '../utils.js'
/**
* TODO: I'm holding off on converting this client to an `AIFunctionsProvider`
* because it seems to be significantly more expensive than other data sources,
* and I'm not sure if it's worth the cost.
*/
export namespace peopledatalabs {
export const BASE_URL = 'https://api.peopledatalabs.com/v5/'
// Allow up to 10 requests per minute.
export const throttle10PerMin = pThrottle({
limit: 10,
interval: 60 * 1000,
strict: true
})
// Allow up to 100 requests per minute.
export const throttle100PerMin = pThrottle({
limit: 100,
interval: 60 * 1000,
strict: true
})
export const JobTitleLevels = [
'cxo',
'director',
'entry',
'manager',
'owner',
'partner',
'senior',
'training',
'unpaid',
'vp'
]
export const JobTitleRoles = [
'customer_service',
'design',
'education',
'engineering',
'finance',
'health',
'human_resources',
'legal',
'marketing',
'media',
'operations',
'public_relations',
'real_estate',
'sales',
'trades'
]
// TODO configure this type to make pdl_id or name or profile or ticker or website required.
// Only one is required
export interface CompanyLookupOptions {
pdl_id?: string
name?: string
profile?: string
ticker?: string
website?: string
location?: string[]
locality?: string
region?: string
country?: string
street_address?: string
postal_code?: string
data_include?: string
pretty?: boolean
}
export interface Naics {
naics_code: string
sector: string
sub_sector: string
industry_group: string
naics_industry: string | null
national_industry: string | null
}
export interface Sic {
sic_code: string
major_group: string
industry_group: string
industry_sector: string | null
}
export interface Location {
name: string
locality: string
region: string
metro: string
country: string
continent: string
street_address: string
address_line_2: string | null
postal_code: string
geo: string
}
export interface EmployeeCountByCountry {
[country: string]: number
}
export interface CompanyLookupResponse {
status: number
name: string
display_name: string
size: string
employee_count: number
id: string
founded: number
industry: string
naics: Naics[]
sic: Sic[]
location: Location
linkedin_id: string
linkedin_url: string
facebook_url: string
twitter_url: string
profiles: string[]
website: string
ticker: string
gics_sector: string | null
mic_exchange: string | null
type: string
summary: string
tags: string[]
headline: string
alternative_names: string[]
alternative_domains: string[]
affiliated_profiles: string[]
employee_count_by_country: EmployeeCountByCountry
likelihood: number
}
export interface CompanySearchOptions {
limit?: number
query: {
website?: string
tags?: string
industry?: string
'location.country'?: string
'location.metro'?: string
summary?: string
size?: string[]
affiliated_profiles?: string
}
}
export type CompanySearchOptionsQueryKeys =
keyof CompanySearchOptions['query']
export interface CompanySearchResponse {
status: number
data: {
name: string
display_name: string
size: string
employee_count: number
id: string
founded: number
industry: string
naics: Naics[]
sic: Sic[]
location: Location
linkedin_id: string
linkedin_url: string
facebook_url: string
twitter_url: string
profiles: string[]
website: string
ticker: string
gics_sector: string | null
mic_exchange: string | null
type: string
summary: string
tags: string[]
headline: string
alternative_names: string[]
alternative_domains: string[]
affiliated_profiles: string[]
employee_count_by_country: EmployeeCountByCountry
}[]
scroll_token: string
total: number
}
export interface PersonSearchOptions {
limit?: number
query: {
first_name?: string
full_name?: string
last_name?: string
job_company_website?: string
job_title_role?: string
/**
* The docs says this property should be an array of strings.
* But when sending the array a 404 error is returned.
* See: https://docs.peopledatalabs.com/docs/fields#job_title_levels
*/
job_title_levels?: string
job_company_name?: string
job_company_location_country?: string
}
}
export type PersonSearchOptionsQueryKeys = keyof PersonSearchOptions['query']
// Person response
export interface SearchPersonApiResponse {
id: string
full_name: string
first_name: string
middle_initial: null | string
middle_name: null | string
last_initial: string
last_name: string
gender: string
birth_year: null | number
birth_date: null | string
linkedin_url: string
linkedin_username: string
linkedin_id: string
facebook_url: null | string
facebook_username: null | string
facebook_id: null | string
twitter_url: string
twitter_username: string
github_url: null | string
github_username: null | string
work_email: string
personal_emails: string[]
recommended_personal_email: null | string
mobile_phone: null | string
industry: null | string
job_title: string
job_title_role: null | string
job_title_sub_role: null | string
job_title_levels: string[]
job_onet_code: string
job_onet_major_group: string
job_onet_minor_group: string
job_onet_broad_occupation: string
job_onet_specific_occupation: string
job_onet_specific_occupation_detail: string
job_company_id: string
job_company_name: string
job_company_website: string
job_company_size: string
job_company_founded: number
job_company_industry: string
job_company_linkedin_url: string
job_company_linkedin_id: string
job_company_facebook_url: string
job_company_twitter_url: string
job_company_type: string
job_company_ticker: null | string
job_company_location_name: string
job_company_location_locality: string
job_company_location_metro: string
job_company_location_region: string
job_company_location_geo: string
job_company_location_street_address: string
job_company_location_address_line_2: string
job_company_location_postal_code: string
job_company_location_country: string
job_company_location_continent: string
job_last_updated: string
job_start_date: string
job_summary: null | string
location_name: null | string
location_locality: null | string
location_metro: null | string
location_region: null | string
location_country: null | string
location_continent: null | string
location_street_address: null | string
location_address_line_2: null | string
location_postal_code: null | string
location_geo: null | string
location_last_updated: null | string
linkedin_connections: number
facebook_friends: null | string
inferred_salary: string
inferred_years_experience: number
summary: null | string
phone_numbers: string[]
phones: string[]
emails: Email[]
interests: string[]
skills: string[]
location_names: string[]
regions: string[]
countries: string[]
street_addresses: string[]
experience: Experience[]
education: Education[]
profiles: Profile[]
name_aliases: string[]
possible_emails: PossibleEmail[]
possible_profiles: PossibleProfile[]
possible_phones: PossiblePhone[]
possible_street_addresses: string[]
possible_location_names: string[]
possible_birth_dates: string[]
job_history: JobHistory[]
certifications: string[]
languages: string[]
first_seen: string
num_sources: number
num_records: number
version_status: VersionStatus
}
export interface Email {
address: string
type: null | string
first_seen: string
last_seen: string
num_sources: number
}
export interface Experience {
company: Company
start_date: null | string
end_date: null | string
title: Title
location_names: string[]
is_primary: boolean
summary: null | string
num_sources: number
first_seen: string
last_seen: string
}
export interface Company {
name: string
size: string
id: string
founded: number
industry: string
location: Location
linkedin_url: string
linkedin_id: string
facebook_url: null | string
twitter_url: string
website: string
ticker: null | string
type: string
raw: string[]
fuzzy_match: boolean
}
export interface Title {
name: string
raw: string[]
role: null | string
sub_role: null | string
levels: string[]
}
export interface Education {
school: School
degrees: string[]
start_date: string
end_date: string
majors: string[]
minors: string[]
gpa: null | string
raw: string[]
summary: null | string
}
export interface School {
name: string
type: string
id: string
location: Location
linkedin_url: string
facebook_url: string
twitter_url: string
linkedin_id: string
website: string
domain: string
raw: string[]
}
export interface Profile {
network: string
id: null | string
url: string
username: string
num_sources: number
first_seen: string
last_seen: string
}
export interface PossibleEmail {
address: string
type: null | string
first_seen: string
last_seen: string
num_sources: number
}
export interface PossibleProfile {
network: string
id: null | string
url: string
username: null | string
num_sources: number
first_seen: string
last_seen: string
}
export interface PossiblePhone {
number: string
first_seen: string
last_seen: string
num_sources: number
}
export interface VersionStatus {
status: string
contains: string[]
previous_version: string
current_version: string
}
export interface JobHistory {
company_id: string
company_name: string
title: string
first_seen: string
last_seen: string
num_sources: number
}
}
/**
* People & Company Data
*
* @see https://www.peopledatalabs.com
*/
export class PeopleDataLabsClient {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('PEOPLE_DATA_LABS_API_KEY'),
apiBaseUrl = peopledatalabs.BASE_URL,
timeoutMs = 30_000,
throttle = true,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
timeoutMs?: number
throttle?: boolean
ky?: KyInstance
} = {}) {
assert(
apiKey,
'PeopleDataLabsClient missing required "apiKey" (defaults to "PEOPLE_DATA_LABS_API_KEY")'
)
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
const throttledKy = throttle
? throttleKy(ky, peopledatalabs.throttle10PerMin)
: ky
this.ky = throttledKy.extend({
prefixUrl: apiBaseUrl,
timeout: timeoutMs,
headers: {
'x-api-key': `${this.apiKey}`
}
})
}
async companyLookup(options: peopledatalabs.CompanySearchOptions) {
const terms = options.query
const termsQuery = []
for (const term of Object.keys(
terms
) as peopledatalabs.CompanySearchOptionsQueryKeys[]) {
termsQuery.push({ term: { [term]: terms[term] } })
}
return this.ky
.get('company/search', {
searchParams: {
size: options.limit || 1,
query: JSON.stringify({
bool: {
must: termsQuery
}
})
}
})
.json<peopledatalabs.CompanySearchResponse>()
}
async companyProfile(options: peopledatalabs.CompanyLookupOptions) {
return this.ky
.get('company/enrich', {
searchParams: sanitizeSearchParams({ ...options })
})
.json<peopledatalabs.CompanyLookupResponse>()
}
async personSearch(options: peopledatalabs.PersonSearchOptions) {
const terms = options.query
const termsQuery = []
for (const term of Object.keys(
terms
) as peopledatalabs.PersonSearchOptionsQueryKeys[]) {
termsQuery.push({ term: { [term]: terms[term] } })
}
return this.ky
.get('person/search', {
searchParams: {
size: options.limit || 10,
query: JSON.stringify({
bool: {
must: termsQuery
}
})
}
})
.json<peopledatalabs.SearchPersonApiResponse>()
}
}

Wyświetl plik

@ -0,0 +1,794 @@
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, sanitizeSearchParams, throttleKy } from '../utils.js'
// TODO: https://docs.goperigon.com/docs/searching-sources
// TODO: https://docs.goperigon.com/docs/journalist-data
// TODO: https://docs.goperigon.com/docs/topics
export namespace perigon {
// Allow up to 2 requests per second by default.
export const throttle = pThrottle({
limit: 2,
interval: 1000,
strict: true
})
export const DEFAULT_PAGE_SIZE = 10
export const MAX_PAGE_SIZE = 100
export const ArticleLabelSchema = z.union([
z.literal('Opinion'),
z.literal('Non-news'),
z.literal('Paid News'),
z.literal('Fact Check'),
z.literal('Pop Culture'),
z.literal('Roundup'),
z.literal('Press Release')
])
export type ArticleLabel = z.infer<typeof ArticleLabelSchema>
export const CategoriesSchema = z.union([
z.literal('Politics'),
z.literal('Tech'),
z.literal('Sports'),
z.literal('Business'),
z.literal('Finance'),
z.literal('Entertainment'),
z.literal('Health'),
z.literal('Weather'),
z.literal('Lifestyle'),
z.literal('Auto'),
z.literal('Science'),
z.literal('Travel'),
z.literal('Environment'),
z.literal('World'),
z.literal('General'),
z.literal('none')
])
export type Categories = z.infer<typeof CategoriesSchema>
export const SourceGroupSchema = z.union([
z.literal('top10').describe('Top 10 most popular sources globally'),
z.literal('top100').describe('Top 100 most popular sources globally'),
z
.literal('top500English')
.describe('Top 500 most popular (English) sources globally'),
z
.literal('top25crypto')
.describe(
'Top 25 most popular sources covering cryptocurrency & blockchain developments'
),
z
.literal('top25finance')
.describe(
'Top 25 most popular sources covering financial news, movement in the markets & public equities'
),
z
.literal('top50tech')
.describe('Top 50 sources covering new technology & businesses in tech'),
z
.literal('top100sports')
.describe(
'Top 100 most popular (English) sources covering sports of all types'
),
z
.literal('top100leftUS')
.describe(
'Top 100 most popular (US) sources with an average political bias rating of Left or Leans Left'
),
z
.literal('top100rightUS')
.describe(
'Top 100 most popular (US) sources with an average political bias rating of Right or Leans Right'
),
z
.literal('top100centerUS')
.describe(
'Top 100 most popular (US) sources with an average political bias rating of Center or Middle'
)
])
export type SourceGroup = z.infer<typeof SourceGroupSchema>
export const SortBySchema = z.union([
z.literal('date'),
z.literal('relevance'),
z.literal('addDate'),
z.literal('pubDate'),
z.literal('refreshDate')
])
export type SortBy = z.infer<typeof SortBySchema>
export const ArticlesSearchOptionsSchema = z.object({
q: z.string()
.describe(`Search query. It may use boolean operators (AND, OR, NOT) and quotes for exact matching. Example search queries:
- election news
- "elon musk" AND tesla
- (upcoming release OR launch) AND apple
- (Google OR Amazon) AND NOT ("Jeff Bezos" OR Android)
- "climate change"
`),
title: z
.string()
.optional()
.describe(
'Search query which applies only to article titles / headlines.'
),
desc: z.string().optional(),
content: z.string().optional(),
url: z.string().optional(),
from: z
.string()
.optional()
.describe(
'Filter to only return articles published after the specified date (ISO or "yyyy-mm-dd" format)'
),
to: z
.string()
.optional()
.describe(
'Filter to only return articles published before the specified date (ISO or "yyyy-mm-dd" format)'
),
addDateFrom: z.string().optional(),
addDateTo: z.string().optional(),
refreshDateFrom: z.string().optional(),
refreshDateTo: z.string().optional(),
articleId: z.string().optional(),
clusterId: z.string().optional(),
medium: z.union([z.literal('article'), z.literal('video')]).optional(),
source: z
.string()
.optional()
.describe("Filter articles from a specific publisher's source domain."),
sourceGroup: SourceGroupSchema.optional().describe(
'The source group to retrieve articles from.'
),
excludeSource: z
.string()
.optional()
.describe(
'Source website domains which should be excluded from the search. Wildcards (* and ?) are suported (e.g. "*.cnn.com").'
),
paywall: z
.boolean()
.optional()
.describe(
'Filter to show only results where the source has a paywall (true) or does not have a paywall (false).'
),
country: z
.string()
.optional()
.describe('Country code to filter by country.'),
language: z.string().optional(),
label: ArticleLabelSchema.optional().describe(
'Labels to filter by, could be "Opinion", "Paid-news", "Non-news", etc. If multiple parameters are passed, they will be applied as OR operations.'
),
excludeLabel: z
.union([ArticleLabelSchema, z.literal('Low Content')])
.optional()
.describe(
'Exclude results that include specific labels ("Opinion", "Non-news", "Paid News", etc.). You can filter multiple by repeating the parameter.'
),
byline: z.string().optional(),
topic: z.string().optional(),
category: CategoriesSchema.optional().describe(
'Filter by categories. Categories are general themes that the article is about. Examples of categories: Tech, Politics, etc. If multiple parameters are passed, they will be applied as OR operations. Use "none" to search uncategorized articles.'
),
journalistId: z.string().optional(),
state: z
.string()
.optional()
.describe(
'Filters articles where a specified state plays a central role in the content, beyond mere mentions, to ensure the results are deeply relevant to the state in question.'
),
city: z
.string()
.optional()
.describe(
'Filters articles where a specified city plays a central role in the content, beyond mere mentions, to ensure the results are deeply relevant to the urban area in question.'
),
area: z
.string()
.optional()
.describe(
'Filters articles where a specified area, such as a neighborhood, borough, or district, plays a central role in the content, beyond mere mentions, to ensure the results are deeply relevant to the area in question.'
),
location: z.string().optional(),
sortBy: SortBySchema.default('relevance')
.optional()
.describe('How to sort the article results.'),
showReprints: z
.boolean()
.optional()
.describe(
'Whether to return reprints in the response or not. Reprints are usually wired articles from sources like AP or Reuters that are reprinted in multiple sources at the same time. By default, this parameter is "true".'
),
showNumResults: z.boolean().optional(),
type: z
.union([z.literal('all'), z.literal('local'), z.literal('world')])
.optional(),
linkTo: z.string().optional(),
reprintGroupId: z.string().optional(),
personWikidataId: z.array(z.string()).optional(),
personName: z
.array(z.string())
.optional()
.describe('List of person names for exact matches.'),
companyId: z.array(z.string()).optional(),
companyName: z.string().optional().describe('Search by company name.'),
companyDomain: z
.array(z.string())
.optional()
.describe('Search by company domain.'),
companySymbol: z
.array(z.string())
.optional()
.describe('Search by company stock ticker symbol.'),
maxDistance: z.number().optional(),
lat: z.number().optional(),
lon: z.number().optional(),
searchTranslation: z
.boolean()
.optional()
.describe(
'Expand a query to search the translation, translatedTitle, and translatedDescription fields for non-English articles.'
),
page: z
.number()
.int()
.positive()
.max(10_000)
.default(0)
.optional()
.describe('Page number of results to return (zero-based)'),
size: z
.number()
.int()
.positive()
.max(DEFAULT_PAGE_SIZE)
.optional()
.describe('Number of results to return per page')
})
export type ArticlesSearchOptions = z.infer<
typeof ArticlesSearchOptionsSchema
>
export const StoriesSearchOptionsSchema = ArticlesSearchOptionsSchema.pick({
q: true,
clusterId: true,
topic: true,
category: true,
from: true,
to: true,
state: true,
city: true,
area: true,
showNumResults: true,
page: true,
size: true,
sourceGroup: true,
personWikidataId: true,
personName: true,
companyId: true,
companyName: true,
companyDomain: true,
companySymbol: true
}).extend({
name: z.string().optional().describe('Search stories by name.'),
nameExists: z.boolean().optional(),
initializedFrom: z.string().optional(),
initializedTo: z.string().optional(),
updatedFrom: z.string().optional(),
updatedTo: z.string().optional(),
minClusterSize: z.number().optional(),
maxClusterSize: z.number().optional(),
showDuplicates: z
.boolean()
.optional()
.describe(
'Stories are deduplicated by default. If a story is deduplicated, all future articles are merged into the original story. `duplicateOf` field contains the original cluster id. When showDuplicates=true, all stories are shown.'
),
sortBy: z
.union([
z.literal('count'),
z.literal('createdAt'),
z.literal('updatedAt')
])
.optional()
.describe('How to sort the results.')
})
export type StoriesSearchOptions = z.infer<typeof StoriesSearchOptionsSchema>
export const PeopleSearchOptionsSchema = z.object({
name: z
.string()
.describe(
'Person name query to search for. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
wikidataId: z
.array(z.string())
.optional()
.describe('Search by ID of Wikidata entity.'),
occupationId: z
.array(z.string())
.optional()
.describe('Search by Wikidata occupation ID.'),
occupationLabel: z
.string()
.optional()
.describe('Search by occupation name.'),
size: z
.number()
.int()
.positive()
.max(DEFAULT_PAGE_SIZE)
.optional()
.describe('Number of results to return per page')
})
export type PeopleSearchOptions = z.infer<typeof PeopleSearchOptionsSchema>
export const CompanySearchOptionsSchema = z.object({
q: z
.string()
.optional()
.describe(
'Company search query. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
name: z
.string()
.optional()
.describe(
'Search by company name. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
industry: z
.string()
.optional()
.describe(
'Search by company industry. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
sector: z
.string()
.optional()
.describe(
'Search by company sector. It may use boolean operators (AND, OR, NOT) and quotes for exact matching.'
),
id: z.array(z.string()).optional().describe('Search by company ID.'),
symbol: z
.array(z.string())
.optional()
.describe('Search by company stock ticker symbol.'),
domain: z
.array(z.string())
.optional()
.describe('Search by company domain.'),
country: z.string().optional().describe('Search by country.'),
exchange: z.string().optional().describe('Search by exchange name.'),
numEmployeesFrom: z
.number()
.int()
.positive()
.optional()
.describe('Minimum number of employees.'),
numEmployeesTo: z
.number()
.int()
.positive()
.optional()
.describe('Maximum number of employees.'),
ipoFrom: z
.string()
.optional()
.describe('Starting IPO date (ISO or "yyyy-mm-dd" format)'),
ipoTo: z
.string()
.optional()
.describe('Ending IPO date (ISO or "yyyy-mm-dd" format)'),
size: z
.number()
.int()
.positive()
.max(DEFAULT_PAGE_SIZE)
.optional()
.describe('Number of results to return per page')
})
export type CompanySearchOptions = z.infer<typeof CompanySearchOptionsSchema>
export type ArticlesSearchResponse = {
status: number
numResults: number
articles: Article[]
}
export type Article = {
url: string
authorsByline: string
articleId: string
clusterId: string
source: {
domain: string
}
imageUrl: string
country: string
language: string
pubDate: string
addDate: string
refreshDate: string
score: number
title: string
description: string
content: string
medium: string
links: string[]
labels: string[]
matchedAuthors: string[]
claim: string
verdict: string
keywords: {
name: string
weight: number
}[]
topics: {
name: string
}[]
categories: {
name: string
}[]
entities: {
data: string
type: string
mentions: number
}[]
sentiment: {
positive: number
negative: number
neutral: number
}
summary: string
translation: string
locations: string[]
reprint: boolean
reprintGroupId: string
places: null
}
export type StoriesSearchResponse = {
status: number
numResults: number
results: Story[]
}
export type Story = {
createdAt: string
updatedAt: string
initializedAt: string
id: string
name: string
summary: string
summaryReferences: Array<any>
keyPoints: Array<{
point: string
references: Array<string>
}>
sentiment: {
positive: number
negative: number
neutral: number
}
uniqueCount: number
reprintCount: number
totalCount: number
countries: Array<{
name: string
count: number
}>
topCountries: Array<string>
topics: Array<{
name: string
count: number
}>
topTopics: Array<{ name: string }>
categories: Array<{
name: string
count: number
}>
topCategories: Array<{ name: string }>
people: Array<{ wikidataId: string; name: string; count: number }>
topPeople: Array<{ wikidataId: string; name: string }>
companies: Array<{
id: string
name: string
domains: Array<string>
symbols: Array<string>
count: number
}>
topCompanies: Array<{
id: string
name: string
domains: Array<string>
symbols: Array<string>
}>
locations: Array<{
state: string
city?: string
area?: string
county?: string
count: number
}>
topLocations: Array<{
state: string
city?: string
area?: string
county?: string
}>
}
export interface PeopleSearchResponse {
status: number
numResults: number
results: Person[]
}
export interface Person {
wikidataId: string
name: string
gender: Gender
dateOfBirth: DateOfBirth
dateOfDeath: any
description: string
aliases: string[]
occupation: Occupation[]
position: Position[]
politicalParty: PoliticalParty[]
image?: Image
abstract: string
}
export interface Gender {
wikidataId: string
label: string
}
export interface DateOfBirth {
time: string
precision: string
}
export interface Occupation {
wikidataId: string
label: string
}
export interface Position {
wikidataId: string
label: string
startTime: any
endTime: any
employer: any
}
export interface PoliticalParty {
wikidataId: string
label: string
startTime: any
endTime: any
}
export interface Image {
url: string
}
export interface CompanySearchResponse {
status: number
numResults: number
results: Company[]
}
export interface Company {
id: string
name: string
altNames: string[]
domains: string[]
monthlyVisits: number
globalRank?: number
description: string
ceo: any
industry: string
sector: any
country: string
fullTimeEmployees?: number
address: any
city: any
state: any
zip: any
logo?: string
favicon?: string
isEtf: boolean
isActivelyTrading: any
isFund: boolean
isAdr: boolean
symbols: any[]
}
}
/**
* **The intelligent news API**
*
* Real-time global news and web content data from 140,000+ sources.
*
* - search news articles
* - search news stories (clusters of related news articles)
* - search people, companies, topics, and journalists
*
* @see https://www.goperigon.com/products/news-api
*/
export class PerigonClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
constructor({
apiKey = getEnv('PERIGON_API_KEY'),
timeoutMs = 30_000,
throttle = true,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
throttle?: boolean
timeoutMs?: number
ky?: KyInstance
} = {}) {
assert(
apiKey,
'PerigonClient missing required "apiKey" (defaults to "PERIGON_API_KEY")'
)
super()
this.apiKey = apiKey
const throttledKy = throttle ? throttleKy(ky, perigon.throttle) : ky
this.ky = throttledKy.extend({
prefixUrl: 'https://api.goperigon.com/v1/',
timeout: timeoutMs
})
}
/**
* @see https://docs.goperigon.com/docs/overview
* @see https://docs.goperigon.com/reference/all-news
*/
@aiFunction({
name: 'search_news_articles',
description:
'Search for news articles indexed by Perigon. Articles can optionally be filtered by various parameters.',
inputSchema: perigon.ArticlesSearchOptionsSchema.pick({
q: true,
title: true,
from: true,
to: true,
source: true,
sourceGroup: true,
excludeSource: true,
category: true,
personName: true,
companyName: true,
companyDomain: true,
sortBy: true
})
})
async searchArticles(opts: perigon.ArticlesSearchOptions) {
return this.ky
.get('all', {
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
})
})
.json<perigon.ArticlesSearchResponse>()
}
/**
* @see https://docs.goperigon.com/docs/stories-overview
* @see https://docs.goperigon.com/reference/stories-1
*/
@aiFunction({
name: 'search_news_stories',
description:
'Search for news stories indexed by Perigon. Stories are clusters of related news articles and are useful for finding top stories and trending headlines. Stories can optionally be filtered by various parameters.',
inputSchema: perigon.StoriesSearchOptionsSchema.pick({
q: true,
name: true,
from: true,
to: true,
sourceGroup: true,
category: true,
personName: true,
companyName: true,
companyDomain: true,
sortBy: true
})
})
async searchStories(opts: perigon.StoriesSearchOptions) {
return this.ky
.get('stories/all', {
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
})
})
.json<perigon.StoriesSearchResponse>()
}
/**
* @see https://docs.goperigon.com/docs/people-data
* @see https://docs.goperigon.com/reference/people
*/
@aiFunction({
name: 'search_people',
description: 'Search for well-known people indexed by Perigon.',
inputSchema: perigon.PeopleSearchOptionsSchema
})
async searchPeople(opts: perigon.PeopleSearchOptions) {
return this.ky
.get('people/all', {
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
})
})
.json<perigon.PeopleSearchResponse>()
}
/**
* @see https://docs.goperigon.com/docs/company-data
* @see https://docs.goperigon.com/reference/companies
*/
@aiFunction({
name: 'search_companies',
description:
'Search for companies indexed by Perigon. Includes public and private companies sourced from public records and Wikidata.',
inputSchema: perigon.CompanySearchOptionsSchema
})
async searchCompanies(opts: perigon.CompanySearchOptions) {
return this.ky
.get('companies/all', {
searchParams: sanitizeSearchParams({
...opts,
apiKey: this.apiKey,
size: Math.max(
1,
Math.min(
perigon.MAX_PAGE_SIZE,
opts.size || perigon.DEFAULT_PAGE_SIZE
)
)
})
})
.json<perigon.CompanySearchResponse>()
}
}

Plik diff jest za duży Load Diff

Wyświetl plik

@ -0,0 +1,793 @@
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { z } from 'zod'
import type { DeepNullable } from '../types.js'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import {
assert,
getEnv,
pruneUndefined,
sanitizeSearchParams,
throttleKy
} from '../utils.js'
// TODO: improve `domain` validation for fast-fail
export namespace predictleads {
// Allow up to 20 requests per minute by default.
export const throttle = pThrottle({
limit: 20,
interval: 60 * 1000
})
export const DEFAULT_PAGE_SIZE = 100
export const MAX_PAGE_SIZE = 1000
export type Meta = DeepNullable<{
count: number
message?: string | null
message_type?: string
}>
export type GenericSuccessResponse = {
success: {
type: string
message: string
}
}
export type FollowedCompaniesResponse = {
data: DeepNullable<
Array<{
domain: string
custom_company_identifier: string | null
}>
>
meta: Meta
}
export type Relationship = Record<
string,
{
data: {
id: string
type: string
}
}
>
export type AdditionalData = {
relationships: {
companies: [string, string]
}
date: string
location: string
location_data: {
region?: string
continent?: string
country?: string
state?: string
zip_code?: string
city?: string
fuzzy_match?: boolean
}
contact?: string
job_title?: string
product?: string
product_tags?: string[]
amount?: number
recognition?: string
assets?: string
asset_tags?: string[]
headcount?: number
award?: string
financing_type?: string
financing_type_tags?: string[]
funding_round?: string
division?: string
conference?: string
vulnerability?: string
planning?: boolean
article_title?: string
article_sentence?: string
article_body?: string
article_source?: string
article_published_at?: string
article_image_url?: string
}
export type Event = {
id: string
type: string
attributes: {
categories: string[]
title: string
url: string
found_at: string
additional_data: AdditionalData
domain: string
location: string
location_data: {
state: string
country: string
}
company_name: string
friendly_company_name: string
ticker: null
meta_title: string
meta_description: string
published_at: string
post_type: string
post_url: string
company_domain: string
fuzzy_match: boolean
}
relationships: Relationship
}
export type Response = DeepNullable<{
data: Event[]
included: Relationship
meta: Meta
}>
export type JobOpeningData = {
id: string
type: string
attributes: {
title: string
url: string
description: string
salary: string
salary_data: {
salary_low: number
salary_high: number
salary_currency: string
salary_low_usd: number
salary_high_usd: number
salary_time_unit: string
}
job_opening_closed: boolean
location: string
contract_types: string[]
first_seen_at: string
last_seen_at: string
last_processed_at: string
categories: string[]
onet_code: string
additional_data: {
job_title_seniority: string
tags: string[]
location_data: {
country: string
city: string
fuzzy_match: boolean
}
}
}
relationships: {
company: {
data: {
id: string
type: string
}
}
}
}
export type CompanyData = {
id: string
type: string
attributes: {
domain: string
company_name: string
ticker: string | null
}
}
export type JobOpeningResponse = DeepNullable<{
data: JobOpeningData[]
included: CompanyData[]
meta: {
count: number
}
}>
export type JobOpeningByIdResponse = Omit<JobOpeningResponse, 'meta'>
export const EventCategorySchema = z
.union([
z
.literal('hires')
.describe(
'Company hired new executive or senior personnel. (leadership)'
),
z
.literal('promotes')
.describe(
'Company promoted existing executive or senior personnel. (leadership)'
),
z
.literal('leaves')
.describe(
'Executive or senior personnel left the company. (leadership)'
),
z
.literal('retires')
.describe(
'Executive or senior personnel retires from the company. (leadership)'
),
z
.literal('acquires')
.describe('Company acquired other company. (acquisition)'),
z
.literal('merges_with')
.describe('Company merges with other company. (acquisition)'),
z
.literal('sells_assets_to')
.describe(
'Company sells assets (like properties or warehouses) to other company. (acquisition)'
),
z
.literal('expands_offices_to')
.describe(
'Company opens new offices in another town, state, country or continent. (expansion)'
),
z
.literal('expands_offices_in')
.describe('Company expands existing offices. (expansion)'),
z
.literal('expands_facilities')
.describe(
'Company opens new or expands existing facilities like warehouses, data centers, manufacturing plants etc. (expansion)'
),
z
.literal('opens_new_location')
.describe(
'Company opens new service location like hotels, restaurants, bars, hospitals etc. (expansion)'
),
z
.literal('increases_headcount_by')
.describe('Company offers new job vacancies. (expansion)'),
z
.literal('launches')
.describe('Company launches new offering. (new_offering)'),
z
.literal('integrates_with')
.describe('Company integrates with other company. (new_offering)'),
z
.literal('is_developing')
.describe(
'Company begins development of a new offering. (new_offering)'
),
z
.literal('receives_financing')
.describe(
'Company receives investment like venture funding, loan, grant etc. (investment)'
),
z
.literal('invests_into')
.describe('Company invests into other company. (investment)'),
z
.literal('invests_into_assets')
.describe(
'Company invests into assets like property, trucks, facilities etc. (investment)'
),
z
.literal('goes_public')
.describe(
'Company issues shares to the public for the first time. (investment)'
),
z
.literal('closes_offices_in')
.describe('Company closes existing offices. (cost_cutting)'),
z
.literal('decreases_headcount_by')
.describe('Company lays off employees. (cost_cutting)'),
z
.literal('partners_with')
.describe('Company partners with other company. (partnership)'),
z
.literal('receives_award')
.describe(
'Company or person at the company receives an award. (recognition)'
),
z
.literal('recognized_as')
.describe(
'Company or person at the company receives recognition. (recognition)'
),
z
.literal('signs_new_client')
.describe('Company signs new client. (contract)'),
z
.literal('files_suit_against')
.describe(
'Company files suit against other company. (corporate_challenges)'
),
z
.literal('has_issues_with')
.describe('Company has vulnerability problems. (corporate_challenges)'),
z
.literal('identified_as_competitor_of')
.describe('New or existing competitor was identified. (relational)')
])
.describe('Event category')
export type EventCategory = z.infer<typeof EventCategorySchema>
export const CompanyParamsSchema = z.object({
domain: z.string().min(3).describe('domain of the company')
})
export type CompanyParams = z.infer<typeof CompanyParamsSchema>
export const CompanyEventsParamsSchema = z.object({
domain: z.string().min(3).describe('domain of the company'),
categories: z.array(EventCategorySchema).optional(),
found_at_from: z
.string()
.optional()
.describe('Signals found from specified date (ISO 8601).'),
found_at_until: z
.string()
.optional()
.describe('Signals found until specified date (ISO 8601).'),
page: z.number().int().positive().default(1).optional(),
limit: z
.number()
.int()
.positive()
.max(MAX_PAGE_SIZE)
.default(DEFAULT_PAGE_SIZE)
.optional(),
with_news_article_bodies: z
.boolean()
.optional()
.describe('Whether or not to include the body contents of news articles.')
})
export type CompanyEventsParams = z.infer<typeof CompanyEventsParamsSchema>
export const CompanyFinancingEventsParamsSchema = z.object({
domain: z.string().min(3).describe('domain of the company')
})
export type CompanyFinancingEventsParams = z.infer<
typeof CompanyFinancingEventsParamsSchema
>
export const CompanyJobOpeningsParamsSchema = z.object({
domain: z.string().min(3).describe('domain of the company'),
categories: z.array(EventCategorySchema).optional(),
found_at_from: z
.string()
.optional()
.describe('Signals found from specified date (ISO 8601).'),
found_at_until: z
.string()
.optional()
.describe('Signals found until specified date (ISO 8601).'),
limit: z
.number()
.int()
.positive()
.max(MAX_PAGE_SIZE)
.default(DEFAULT_PAGE_SIZE)
.optional(),
with_job_descriptions: z
.boolean()
.optional()
.describe('Whether or not to include the full descriptions of the jobs.'),
with_description_only: z
.boolean()
.optional()
.describe('If set, only returns job openings with descriptions.'),
with_location_only: z
.boolean()
.optional()
.describe('If set, only returns job openings with locations.'),
active_only: z
.boolean()
.optional()
.describe(
'If set, only returns job openings that are not closed, have `last_seen_at` more recent than 5 days and were found in the last year.'
),
not_closed: z
.boolean()
.optional()
.describe(
'Similar to `active_only`, but without considering `last_seen_at` timestamp.'
)
})
export type CompanyJobOpeningsParams = z.infer<
typeof CompanyJobOpeningsParamsSchema
>
export const CompanyTechnologiesParamsSchema = z.object({
domain: z.string().min(3).describe('domain of the company'),
categories: z.array(EventCategorySchema).optional(),
limit: z
.number()
.int()
.positive()
.max(MAX_PAGE_SIZE)
.default(DEFAULT_PAGE_SIZE)
.optional()
})
export type CompanyTechnologiesParams = z.infer<
typeof CompanyTechnologiesParamsSchema
>
export const CompanyConnectionsParamsSchema = z.object({
domain: z.string().min(3).describe('domain of the company'),
categories: z.array(EventCategorySchema).optional(),
limit: z
.number()
.int()
.positive()
.max(MAX_PAGE_SIZE)
.default(DEFAULT_PAGE_SIZE)
.optional()
})
export type CompanyConnectionsParams = z.infer<
typeof CompanyConnectionsParamsSchema
>
export const CompanyWebsiteEvolutionParamsSchema = z.object({
domain: z.string().min(3).describe('domain of the company'),
limit: z
.number()
.int()
.positive()
.max(MAX_PAGE_SIZE)
.default(DEFAULT_PAGE_SIZE)
.optional()
})
export type CompanyWebsiteEvolutionParams = z.infer<
typeof CompanyWebsiteEvolutionParamsSchema
>
export const CompanyGitHubReposParamsSchema = z.object({
domain: z.string().min(3).describe('domain of the company'),
limit: z
.number()
.int()
.positive()
.max(MAX_PAGE_SIZE)
.default(DEFAULT_PAGE_SIZE)
.optional()
})
export type CompanyGitHubReposParams = z.infer<
typeof CompanyGitHubReposParamsSchema
>
export const CompanyProductsParamsSchema = z.object({
domain: z.string().min(3).describe('domain of the company'),
sources: z.array(z.string()).optional(),
limit: z
.number()
.int()
.positive()
.max(MAX_PAGE_SIZE)
.default(DEFAULT_PAGE_SIZE)
.optional()
})
export type CompanyProductsParams = z.infer<
typeof CompanyProductsParamsSchema
>
}
/**
* In-depth company data, including signals like fundraising announcemnts,
* hiring intent, new customers signed, technologies used, product launches,
* location expansions, awards received, etc.
*
* @see https://predictleads.com
*/
export class PredictLeadsClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiToken: string
constructor({
apiKey = getEnv('PREDICT_LEADS_API_KEY'),
apiToken = getEnv('PREDICT_LEADS_API_TOKEN'),
timeoutMs = 30_000,
throttle = true,
ky = defaultKy
}: {
apiKey?: string
apiToken?: string
apiBaseUrl?: string
timeoutMs?: number
throttle?: boolean
ky?: KyInstance
} = {}) {
assert(
apiKey,
'PredictLeadsClient missing required "apiKey" (defaults to "PREDICT_LEADS_API_KEY")'
)
assert(
apiToken,
'PredictLeadsClient missing required "apiToken" (defaults to "PREDICT_LEADS_API_TOKEN")'
)
super()
this.apiKey = apiKey
this.apiToken = apiToken
const throttledKy = throttle ? throttleKy(ky, predictleads.throttle) : ky
this.ky = throttledKy.extend({
prefixUrl: 'https://predictleads.com/api',
timeout: timeoutMs,
headers: {
'x-api-key': apiKey,
'x-api-token': apiToken
}
})
}
@aiFunction({
name: 'get_company',
description:
'Returns basic information about a company given its `domain` like location, name, stock ticker, description, etc.',
inputSchema: predictleads.CompanyParamsSchema
})
async company(domainOrOpts: string | predictleads.CompanyParams) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const { domain } = opts
assert(domain, 'Missing required company "domain"')
return this.ky.get(`v2/companies/${domain}`).json<predictleads.Response>()
}
@aiFunction({
name: 'get_company_events',
description:
'Returns a list of events from news for a given company. Events are found in press releases, industry news, blogs, social media, and other online sources.',
inputSchema: predictleads.CompanyEventsParamsSchema
})
async getCompanyEvents(
domainOrOpts: string | predictleads.CompanyEventsParams
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const {
domain,
page = 1,
limit = predictleads.DEFAULT_PAGE_SIZE,
...params
} = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/events`, {
searchParams: sanitizeSearchParams({
page,
limit,
...params
})
})
.json<predictleads.Response>()
}
async getEventById(id: string) {
return this.ky.get(`v2/events/${id}`).json<predictleads.Response>()
}
@aiFunction({
name: 'get_company_financing_events',
description:
'Returns a list of financing events for a given company. Financing events include fundraising announcements and quarterly earning reports for public companies. They are sourced from press releases, industry news, blogs, social media, and other online sources.',
inputSchema: predictleads.CompanyFinancingEventsParamsSchema
})
async getCompanyFinancingEvents(
domainOrOpts: string | predictleads.CompanyFinancingEventsParams
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const { domain } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/financing_events`)
.json<predictleads.Response>()
}
@aiFunction({
name: 'get_company_job_openings',
description:
'Returns a list of job openings for a given company. Job openings are found on companies career sites and job boards.',
inputSchema: predictleads.CompanyJobOpeningsParamsSchema
})
async getCompanyJobOpenings(
domainOrOpts: string | predictleads.CompanyJobOpeningsParams
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/job_openings`, {
searchParams: sanitizeSearchParams({
limit,
...params
})
})
.json<predictleads.JobOpeningResponse>()
}
async getJobOpeningById(id: string) {
return this.ky
.get(`v2/job_openings/${id}`)
.json<predictleads.JobOpeningByIdResponse>()
}
@aiFunction({
name: 'get_company_technologies',
description: 'Returns a list of technology providers for a given company.',
inputSchema: predictleads.CompanyTechnologiesParamsSchema
})
async getCompanyTechnologies(
domainOrOpts: string | predictleads.CompanyTechnologiesParams
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/technologies`, {
searchParams: sanitizeSearchParams({
limit,
...params
})
})
.json<predictleads.Response>()
}
@aiFunction({
name: 'get_company_connections',
description:
'Returns a list of categorized business connections. Business connections can be found via backlinks or logos on /our-customers, /case-studies, /portfolio, /clients etc. pages. Business connections enable you to eg. calculate network health of a company, to build systems when new high value connections are made… Connections can be of many types: partner, vendor, investor, parent…',
inputSchema: predictleads.CompanyConnectionsParamsSchema
})
async getCompanyConnections(
domainOrOpts: string | predictleads.CompanyConnectionsParams
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/connections`, {
searchParams: sanitizeSearchParams({
limit,
...params
})
})
.json<predictleads.Response>()
}
@aiFunction({
name: 'get_company_website_evolution',
description:
'Returns insights into how a website has changed over time. E.g., when pages like “Blog”, “Privacy policy”, “Pricing”, “Product”, “API Docs”, “Team”, “Support pages” etc were added. This can serve as a proxy to how quickly a website is growing, to determine the growth stage they are at and also to help segment websites.',
inputSchema: predictleads.CompanyWebsiteEvolutionParamsSchema
})
async getCompanyWebsiteEvolution(
domainOrOpts: string | predictleads.CompanyWebsiteEvolutionParams
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/website_evolution`, {
searchParams: sanitizeSearchParams({ limit, ...params })
})
.json<predictleads.Response>()
}
@aiFunction({
name: 'get_company_github_repos',
description:
'Returns insights into how frequently a company is contributing to its public GitHub repositories.',
inputSchema: predictleads.CompanyGitHubReposParamsSchema
})
async getCompanyGitHubRepositories(
domainOrOpts: string | predictleads.CompanyGitHubReposParams
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/github_repositories`, {
searchParams: sanitizeSearchParams({ limit, ...params })
})
.json<predictleads.Response>()
}
@aiFunction({
name: 'get_company_products',
description:
'Returns what kind of products / solutions / features a company is offering.',
inputSchema: predictleads.CompanyProductsParamsSchema
})
async getCompanyProducts(
domainOrOpts: string | predictleads.CompanyProductsParams
) {
const opts =
typeof domainOrOpts === 'string' ? { domain: domainOrOpts } : domainOrOpts
const { domain, limit = predictleads.DEFAULT_PAGE_SIZE, ...params } = opts
assert(domain, 'Missing required company "domain"')
return this.ky
.get(`v2/companies/${domain}/products`, {
searchParams: sanitizeSearchParams({
limit,
...params
})
})
.json<predictleads.Response>()
}
async discoverStartupJobsHN(params?: {
post_datetime_from?: string
post_datetime_until?: string
min_score?: string
limit?: string
}) {
return this.ky
.get(`v2/discover/startup_platform/jobs_hn`, {
searchParams: params
})
.json<predictleads.Response>()
}
async discoverStartupShowHN(params?: {
post_datetime_from?: string
post_datetime_until?: string
min_score?: string
limit?: string
}) {
return this.ky
.get(`v2/discover/startup_platform/show_hn`, {
searchParams: params
})
.json<predictleads.Response>()
}
// --------------------------------------------------------------------------
// Stateful endpoints which should generally not be used as AI functions.
// --------------------------------------------------------------------------
async followCompany(domain: string, customCompanyIdentifier?: string) {
return this.ky
.post(`v2/companies/${domain}/follow`, {
json: pruneUndefined({ customCompanyIdentifier })
})
.json<predictleads.GenericSuccessResponse>()
}
async getFollowingCompanies(limit: number = predictleads.DEFAULT_PAGE_SIZE) {
return this.ky
.get(`v2/followings`, {
searchParams: sanitizeSearchParams({ limit })
})
.json<predictleads.FollowedCompaniesResponse>()
}
async unfollowCompany(domain: string, customCompanyIdentifier?: string) {
return this.ky
.post(`v2/companies/${domain}/unfollow`, {
json: pruneUndefined({ customCompanyIdentifier })
})
.json<predictleads.GenericSuccessResponse>()
}
}

Plik diff jest za duży Load Diff

Wyświetl plik

@ -0,0 +1,116 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, omit } from '../utils.js'
export namespace scraper {
export type ScrapeResult = {
author: string
byline: string
description: string
imageUrl: string
lang: string
length: number
logoUrl: string
publishedTime: string
siteName: string
title: string
/** The HTML for the main content of the page. */
content: string
/** The raw HTML response from the server. */
rawHtml: string
/** The text for the main content of the page in markdown format. */
markdownContent: string
/** The text for the main content of the page. */
textContent: string
}
}
/**
* This is a single endpoint API for scraping websites. It returns the HTML,
* markdown, and plaintext for main body content of the page, as well as
* metadata like title and description.
*
* It tries the simplest and fastest methods first, and falls back to slower
* proxies and JavaScript rendering if needed.
*
* @note This service is currently available only via a closed beta.
*/
export class ScraperClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiBaseUrl: string
constructor({
apiBaseUrl = getEnv('SCRAPER_API_BASE_URL'),
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
apiBaseUrl,
'ScraperClient missing required "apiBaseUrl" (defaults to "SCRAPER_API_BASE_URL")'
)
super()
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({ prefixUrl: this.apiBaseUrl })
}
@aiFunction({
name: 'scrape_url',
description: 'Scrapes the content of a single URL.',
inputSchema: z.object({
url: z.string().url().describe('The URL of the web page to scrape'),
format: z
.enum(['html', 'markdown', 'plaintext'])
.default('markdown')
.optional()
.describe(
'Whether to return the content as HTML, markdown, or plaintext.'
)
})
})
async scrapeUrl(
urlOrOpts:
| string
| {
url: string
format?: 'html' | 'markdown' | 'plaintext'
timeoutMs?: number
}
): Promise<Partial<scraper.ScrapeResult>> {
const {
timeoutMs = 60_000,
format = 'markdown',
...opts
} = typeof urlOrOpts === 'string' ? { url: urlOrOpts } : urlOrOpts
const res = await this.ky
.post('scrape', {
json: opts,
timeout: timeoutMs
})
.json<scraper.ScrapeResult>()
switch (format) {
case 'html':
return omit(res, 'markdownContent', 'textContent', 'rawHtml')
case 'markdown':
return omit(res, 'textContent', 'rawHtml', 'content')
case 'plaintext':
return omit(res, 'markdownContent', 'rawHtml', 'content')
default:
return res
}
}
}

Wyświetl plik

@ -0,0 +1,354 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, omit, pick, pruneUndefined } from '../utils.js'
export namespace searxng {
export const SearchCategorySchema = z.enum([
'general',
'images',
'videos',
'news',
'map',
'music',
'it',
'science',
'files',
'social media'
])
export type SearchCategory = z.infer<typeof SearchCategorySchema>
export const SearchEngineSchema = z.enum([
'9gag',
'annas archive',
'apk mirror',
'apple app store',
'ahmia',
'anaconda',
'arch linux wiki',
'artic',
'arxiv',
'ask',
'bandcamp',
'wikipedia',
'bilibili',
'bing',
'bing images',
'bing news',
'bing videos',
'bitbucket',
'bpb',
'btdigg',
'ccc-tv',
'openverse',
'chefkoch',
'crossref',
'crowdview',
'yep',
'yep images',
'yep news',
'curlie',
'currency',
'bahnhof',
'deezer',
'destatis',
'deviantart',
'ddg definitions',
'docker hub',
'erowid',
'wikidata',
'duckduckgo',
'duckduckgo images',
'duckduckgo videos',
'duckduckgo news',
'duckduckgo weather',
'apple maps',
'emojipedia',
'tineye',
'etymonline',
'1x',
'fdroid',
'flickr',
'free software directory',
'frinkiac',
'fyyd',
'genius',
'gentoo',
'gitlab',
'github',
'codeberg',
'goodreads',
'google',
'google images',
'google news',
'google videos',
'google scholar',
'google play apps',
'google play movies',
'material icons',
'gpodder',
'habrahabr',
'hackernews',
'hoogle',
'imdb',
'imgur',
'ina',
'invidious',
'jisho',
'kickass',
'lemmy communities',
'lemmy users',
'lemmy posts',
'lemmy comments',
'library genesis',
'z-library',
'library of congress',
'lingva',
'lobste.rs',
'mastodon users',
'mastodon hashtags',
'mdn',
'metacpan',
'mixcloud',
'mozhi',
'mwmbl',
'npm',
'nyaa',
'mankier',
'odysee',
'openairedatasets',
'openairepublications',
'openstreetmap',
'openrepos',
'packagist',
'pdbe',
'photon',
'pinterest',
'piped',
'piped.music',
'piratebay',
'podcastindex',
'presearch',
'presearch images',
'presearch videos',
'presearch news',
'pub.dev',
'pubmed',
'pypi',
'qwant',
'qwant news',
'qwant images',
'qwant videos',
'radio browser',
'reddit',
'rottentomatoes',
'sepiasearch',
'soundcloud',
'stackoverflow',
'askubuntu',
'internetarchivescholar',
'superuser',
'searchcode code',
'semantic scholar',
'startpage',
'tokyotoshokan',
'solidtorrents',
'tagesschau',
'tmdb',
'torch',
'unsplash',
'yandex music',
'yahoo',
'yahoo news',
'youtube',
'dailymotion',
'vimeo',
'wiby',
'alexandria',
'wikibooks',
'wikinews',
'wikiquote',
'wikisource',
'wikispecies',
'wiktionary',
'wikiversity',
'wikivoyage',
'wikicommons.images',
'wolframalpha',
'dictzone',
'mymemory translated',
'1337x',
'duden',
'seznam',
'mojeek',
'moviepilot',
'naver',
'rubygems',
'peertube',
'mediathekviewweb',
'yacy',
'yacy images',
'rumble',
'livespace',
'wordnik',
'woxikon.de synonyme',
'seekr news',
'seekr images',
'seekr videos',
'sjp.pwn',
'stract',
'svgrepo',
'tootfinder',
'wallhaven',
'wikimini',
'wttr.in',
'yummly',
'brave',
'brave.images',
'brave.videos',
'brave.news',
'lib.rs',
'sourcehut',
'goo',
'bt4g',
'pkg.go.dev'
])
export type SearchEngine = z.infer<typeof SearchEngineSchema>
export const SearchOptionsSchema = z.object({
query: z.string().describe('search query'),
categories: z
.array(SearchCategorySchema)
.optional()
.describe(
'narrows the search to only use search engines in specific categories'
),
engines: z
.array(SearchEngineSchema)
.optional()
.describe('narrows the search to only use specific search engines'),
language: z.string().optional(),
pageno: z.number().int().optional()
})
export type SearchOptions = z.infer<typeof SearchOptionsSchema>
export interface SearchResult {
title: string
url: string
img_src?: string
thumbnail_src?: string
thumbnail?: string
content?: string
author?: string
iframe_src?: string
category?: SearchCategory
engine?: SearchEngine
publishedDate?: string
}
export interface SearchResponse {
results: SearchResult[]
suggestions: string[]
query: string
}
}
/**
* Open source meta search engine capable of searching across many different
* sources and search engines.
*
* The most important search engines are:
*
* - "reddit" (Reddit posts)
* - "google" (Google web search)
* - "google news" (Google News search)
* - "brave" (Brave web search)
* - "arxiv" (academic papers)
* - "genius" (Genius.com for song lyrics)
* - "imdb" (movies and TV shows)
* - "hackernews" (Hacker News)
* - "wikidata" (Wikidata)
* - "wolframalpha" (Wolfram Alpha)
* - "youtube" (YouTube videos)
* - "github" (GitHub code and repositories)
*
* @see https://docs.searxng.org
*
* NOTE: You'll need to run a local instance of Searxng to use this client.
*
* See [perplexica](https://github.com/ItzCrazyKns/Perplexica/blob/master/docker-compose.yaml) for an example of how to set this up.
*/
export class SearxngClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiBaseUrl: string
constructor({
apiBaseUrl = getEnv('SEARXNG_API_BASE_URL'),
ky = defaultKy
}: {
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
apiBaseUrl,
'SearxngClient missing required "apiBaseUrl" (defaults to "SEARXNG_API_BASE_URL")'
)
super()
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({ prefixUrl: apiBaseUrl })
}
@aiFunction({
name: 'searxng',
description: `Searches across multiple search engines using a local instance of Searxng. To search only specific engines, use the \`engines\` parameter.
The most important search engines are:
- "reddit" (Reddit posts)
- "google" (Google web search)
- "google news" (Google News search)
- "brave" (Brave web search)
- "arxiv" (academic papers)
- "genius" (Genius.com for song lyrics)
- "imdb" (movies and TV shows)
- "hackernews" (Hacker News)
- "wikidata" (Wikidata)
- "wolframalpha" (Wolfram Alpha)
- "youtube" (YouTube videos)
- "github" (GitHub code and repositories)
`,
inputSchema: searxng.SearchOptionsSchema
})
async search({
query,
...opts
}: searxng.SearchOptions): Promise<searxng.SearchResponse> {
const res = await this.ky
.get('search', {
searchParams: pruneUndefined({
...opts,
q: query,
categories: opts.categories?.join(','),
engines: opts.engines?.join(','),
format: 'json'
})
})
.json<searxng.SearchResponse>()
res.results = res.results?.map(
(result: any) =>
omit(
result,
'parsed_url',
'engines',
'positions',
'template'
) as searxng.SearchResult
)
return pick(res, 'results', 'suggestions', 'query')
}
}

Wyświetl plik

@ -0,0 +1,703 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv } from '../utils.js'
/**
* All types have been exported from the `serpapi` package, which we're
* not using directly because it is bloated and has compatibility issues.
*/
export namespace serpapi {
export const API_BASE_URL = 'https://serpapi.com'
export type BaseResponse<P = Record<string | number | symbol, never>> = {
search_metadata: {
id: string
status: string | 'Queued' | 'Processing' | 'Success'
json_endpoint: string
created_at: string
processed_at: string
raw_html_file: string
total_time_taken: number
}
search_parameters: {
engine: string
} & Omit<BaseParameters & P, 'api_key' | 'no_cache' | 'async' | 'timeout'>
serpapi_pagination?: {
next: string
}
pagination?: {
next: string
}
[key: string]: any
}
export type BaseParameters = {
/**
* Parameter defines the device to use to get the results. It can be set to
* `desktop` (default) to use a regular browser, `tablet` to use a tablet browser
* (currently using iPads), or `mobile` to use a mobile browser (currently
* using iPhones).
*/
device?: 'desktop' | 'tablet' | 'mobile'
/**
* Parameter will force SerpApi to fetch the Google results even if a cached
* version is already present. A cache is served only if the query and all
* parameters are exactly the same. Cache expires after 1h. Cached searches
* are free, and are not counted towards your searches per month. It can be set
* to `false` (default) to allow results from the cache, or `true` to disallow
* results from the cache. `no_cache` and `async` parameters should not be used together.
*/
no_cache?: boolean
/**
* Parameter defines the way you want to submit your search to SerpApi. It can
* be set to `false` (default) to open an HTTP connection and keep it open until
* you got your search results, or `true` to just submit your search to SerpApi
* and retrieve them later. In this case, you'll need to use our
* [Searches Archive API](https://serpapi.com/search-archive-api) to retrieve
* your results. `async` and `no_cache` parameters should not be used together.
* `async` should not be used on accounts with
* [Ludicrous Speed](https://serpapi.com/plan) enabled.
*/
async?: boolean
/**
* Parameter defines the SerpApi private key to use.
*/
api_key?: string | null
/**
* Specify the client-side timeout of the request. In milliseconds.
*/
timeout?: number
}
export type GoogleParameters = BaseParameters & {
/**
* Search Query
* Parameter defines the query you want to search. You can use anything that you
* would use in a regular Google search. e.g. `inurl:`, `site:`, `intitle:`. We
* also support advanced search query parameters such as as_dt and as_eq. See the
* [full list](https://serpapi.com/advanced-google-query-parameters) of supported
* advanced search query parameters.
*/
q: string
/**
* Location
* Parameter defines from where you want the search to originate. If several
* locations match the location requested, we'll pick the most popular one. Head to
* the [/locations.json API](https://serpapi.com/locations-api) if you need more
* precise control. location and uule parameters can't be used together. Avoid
* utilizing location when setting the location outside the U.S. when using Google
* Shopping and/or Google Product API.
*/
location?: string
/**
* Encoded Location
* Parameter is the Google encoded location you want to use for the search. uule
* and location parameters can't be used together.
*/
uule?: string
/**
* Google Place ID
* Parameter defines the id (`CID`) of the Google My Business listing you want to
* scrape. Also known as Google Place ID.
*/
ludocid?: string
/**
* Additional Google Place ID
* Parameter that you might have to use to force the knowledge graph map view to
* show up. You can find the lsig ID by using our [Local Pack
* API](https://serpapi.com/local-pack) or [Places Results
* API](https://serpapi.com/places-results).
* lsig ID is also available via a redirect Google uses within [Google My
* Business](https://www.google.com/business/).
*/
lsig?: string
/**
* Google Knowledge Graph ID
* Parameter defines the id (`KGMID`) of the Google Knowledge Graph listing you
* want to scrape. Also known as Google Knowledge Graph ID. Searches with kgmid
* parameter will return results for the originally encrypted search parameters.
* For some searches, kgmid may override all other parameters except start, and num
* parameters.
*/
kgmid?: string
/**
* Google Cached Search Parameters ID
* Parameter defines the cached search parameters of the Google Search you want to
* scrape. Searches with si parameter will return results for the originally
* encrypted search parameters. For some searches, si may override all other
* parameters except start, and num parameters. si can be used to scrape Google
* Knowledge Graph Tabs.
*/
si?: string
/**
* Domain
* Parameter defines the Google domain to use. It defaults to `google.com`. Head to
* the [Google domains page](https://serpapi.com/google-domains) for a full list of
* supported Google domains.
*/
google_domain?: string
/**
* Country
* Parameter defines the country to use for the Google search. It's a two-letter
* country code. (e.g., `us` for the United States, `uk` for United Kingdom, or
* `fr` for France). Head to the [Google countries
* page](https://serpapi.com/google-countries) for a full list of supported Google
* countries.
*/
gl?: string
/**
* Language
* Parameter defines the language to use for the Google search. It's a two-letter
* language code. (e.g., `en` for English, `es` for Spanish, or `fr` for French).
* Head to the [Google languages page](https://serpapi.com/google-languages) for a
* full list of supported Google languages.
*/
hl?: string
/**
* Set Multiple Languages
* Parameter defines one or multiple languages to limit the search to. It uses
* `lang_{two-letter language code}` to specify languages and `|` as a delimiter.
* (e.g., `lang_fr|lang_de` will only search French and German pages). Head to the
* [Google lr languages page](https://serpapi.com/google-lr-languages) for a full
* list of supported languages.
*/
lr?: string
/**
* as_dt
* Parameter controls whether to include or exclude results from the site named in
* the as_sitesearch parameter.
*/
as_dt?: string
/**
* as_epq
* Parameter identifies a phrase that all documents in the search results must
* contain. You can also use the [phrase
* search](https://developers.google.com/custom-search/docs/xml_results#PhraseSearchqt)
* query term to search for a phrase.
*/
as_epq?: string
/**
* as_eq
* Parameter identifies a word or phrase that should not appear in any documents in
* the search results. You can also use the [exclude
* query](https://developers.google.com/custom-search/docs/xml_results#Excludeqt)
* term to ensure that a particular word or phrase will not appear in the documents
* in a set of search results.
*/
as_eq?: string
/**
* as_lq
* Parameter specifies that all search results should contain a link to a
* particular URL. You can also use the
* [link:](https://developers.google.com/custom-search/docs/xml_results#BackLinksqt)
* query term for this type of query.
*/
as_lq?: string
/**
* as_nlo
* Parameter specifies the starting value for a search range. Use as_nlo and as_nhi
* to append an inclusive search range.
*/
as_nlo?: string
/**
* as_nhi
* Parameter specifies the ending value for a search range. Use as_nlo and as_nhi
* to append an inclusive search range.
*/
as_nhi?: string
/**
* as_oq
* Parameter provides additional search terms to check for in a document, where
* each document in the search results must contain at least one of the additional
* search terms. You can also use the [Boolean
* OR](https://developers.google.com/custom-search/docs/xml_results#BooleanOrqt)
* query term for this type of query.
*/
as_oq?: string
/**
* as_q
* Parameter provides search terms to check for in a document. This parameter is
* also commonly used to allow users to specify additional terms to search for
* within a set of search results.
*/
as_q?: string
/**
* as_qdr
* Parameter requests search results from a specified time period (quick date
* range). The following values are supported:
* `d[number]`: requests results from the specified number of past days. Example
* for the past 10 days: `as_qdr=d10`
* `w[number]`: requests results from the specified number of past weeks.
* `m[number]`: requests results from the specified number of past months.
* `y[number]`: requests results from the specified number of past years. Example
* for the past year: `as_qdr=y`
*/
as_qdr?: string
/**
* as_rq
* Parameter specifies that all search results should be pages that are related to
* the specified URL. The parameter value should be a URL. You can also use the
* [related:](https://developers.google.com/custom-search/docs/xml_results#RelatedLinksqt)
* query term for this type of query.
*/
as_rq?: string
/**
* as_sitesearch
* Parameter allows you to specify that all search results should be pages from a
* given site. By setting the as_dt parameter, you can also use it to exclude pages
* from a given site from your search resutls.
*/
as_sitesearch?: string
/**
* Advanced Search Parameters
* (to be searched) parameter defines advanced search parameters that aren't
* possible in the regular query field. (e.g., advanced search for patents, dates,
* news, videos, images, apps, or text contents).
*/
tbs?: string
/**
* Adult Content Filtering
* Parameter defines the level of filtering for adult content. It can be set to
* `active`, or `off` (default).
*/
safe?: string
/**
* Exclude Auto-corrected Results
* Parameter defines the exclusion of results from an auto-corrected query that is
* spelled wrong. It can be set to `1` to exclude these results, or `0` to include
* them (default).
*/
nfpr?: string
/**
* Results Filtering
* Parameter defines if the filters for 'Similar Results' and 'Omitted Results' are
* on or off. It can be set to `1` (default) to enable these filters, or `0` to
* disable these filters.
*/
filter?: string
/**
* Search Type
* (to be matched) parameter defines the type of search you want to do.
* It can be set to:
* `(no tbm parameter)`: regular Google Search,
* `isch`: [Google Images API](https://serpapi.com/images-results),
* `lcl` - [Google Local API](https://serpapi.com/local-results)
* `vid`: [Google Videos API](https://serpapi.com/videos-results),
* `nws`: [Google News API](https://serpapi.com/news-results),
* `shop`: [Google Shopping API](https://serpapi.com/shopping-results),
* or any other Google service.
*/
tbm?: string
/**
* Result Offset
* Parameter defines the result offset. It skips the given number of results. It's
* used for pagination. (e.g., `0` (default) is the first page of results, `10` is
* the 2nd page of results, `20` is the 3rd page of results, etc.).
* Google Local Results only accepts multiples of `20`(e.g. `20` for the second
* page results, `40` for the third page results, etc.) as the start value.
*/
start?: number
/**
* Number of Results
* Parameter defines the maximum number of results to return. (e.g., `10` (default)
* returns 10 results, `40` returns 40 results, and `100` returns 100 results).
*/
num?: number
/**
* Page Number (images)
* Parameter defines the page number for [Google
* Images](https://serpapi.com/images-results). There are 100 images per page. This
* parameter is equivalent to start (offset) = ijn * 100. This parameter works only
* for [Google Images](https://serpapi.com/images-results) (set tbm to `isch`).
*/
ijn?: string
}
export interface SearchResult extends BaseResponse<GoogleParameters> {
search_metadata: SearchMetadata
search_parameters: SearchParameters
search_information: SearchInformation
local_map?: LocalMap
local_results?: LocalResults
answer_box?: AnswerBox
knowledge_graph?: KnowledgeGraph
inline_images?: InlineImage[]
inline_people_also_search_for?: InlinePeopleAlsoSearchFor[]
related_questions?: SearchResultRelatedQuestion[]
organic_results?: OrganicResult[]
related_searches?: RelatedSearch[]
pagination: Pagination
serpapi_pagination: Pagination
twitter_results?: TwitterResults
}
export interface TwitterResults {
title: string
link: string
displayed_link: string
tweets: Tweet[]
}
export interface Tweet {
link: string
snippet: string
published_date: string
}
export interface AnswerBox {
type: string
title: string
link: string
displayed_link: string
snippet: string
snippet_highlighted_words: string[]
images: string[]
about_this_result: AboutThisResult
about_page_link: string
cached_page_link: string
}
export interface InlineImage {
link: string
source: string
thumbnail: string
original: string
source_name: string
title?: string
}
export interface InlinePeopleAlsoSearchFor {
title: string
items: SearchItem[]
see_more_link: string
see_more_serpapi_link: string
}
export interface SearchItem {
name: string
image: string
link: string
serpapi_link: string
}
export interface KnowledgeGraph {
type: string
kgmid: string
knowledge_graph_search_link: string
serpapi_knowledge_graph_search_link: string
header_images: HeaderImage[]
description: string
source: Source
buttons: Button[]
people_also_search_for: SearchItem[]
people_also_search_for_link: string
people_also_search_for_stick: string
list: { [key: string]: string[] }
}
export interface Button {
text: string
subtitle: string
title: string
link: string
displayed_link: string
snippet?: string
snippet_highlighted_words?: string[]
answer?: string
thumbnail: string
search_link: string
serpapi_search_link: string
date?: string
list?: string[]
}
export interface HeaderImage {
image: string
source: string
}
export interface Source {
name: string
link: string
}
export interface LocalMap {
link: string
image: string
gps_coordinates: LocalMapGpsCoordinates
}
export interface LocalMapGpsCoordinates {
latitude: number
longitude: number
altitude: number
}
export interface LocalResults {
places: Place[]
more_locations_link: string
}
export interface Place {
position: number
title: string
rating?: number
reviews_original?: string
reviews?: number
place_id: string
place_id_search: string
lsig: string
thumbnail: string
gps_coordinates: PlaceGpsCoordinates
service_options: ServiceOptions
address?: string
type?: string
hours?: string
}
export interface PlaceGpsCoordinates {
latitude: number
longitude: number
}
export interface ServiceOptions {
dine_in?: boolean
takeout: boolean
no_delivery?: boolean
}
export interface OrganicResult {
position: number
title: string
link: string
displayed_link: string
thumbnail?: string
favicon?: string
snippet: string
snippet_highlighted_words: string[]
sitelinks?: Sitelinks
rich_snippet?: RichSnippet
about_this_result: AboutThisResult
cached_page_link: string
related_pages_link?: string
source: string
related_results?: RelatedResult[]
date?: string
related_questions?: OrganicResultRelatedQuestion[]
}
export interface AboutThisResult {
keywords: string[]
languages: string[]
regions: string[]
}
export interface OrganicResultRelatedQuestion {
question: string
snippet: string
snippet_links: SnippetLink[]
}
export interface SnippetLink {
text: string
link: string
}
export interface RelatedResult {
position: number
title: string
link: string
displayed_link: string
snippet: string
snippet_highlighted_words: string[]
about_this_result: AboutThisResult
cached_page_link: string
}
export interface RichSnippet {
bottom: Bottom
}
export interface Bottom {
extensions?: string[]
questions?: string[]
}
export interface Sitelinks {
inline: Inline[]
}
export interface Inline {
title: string
link: string
}
export interface Pagination {
current: number
next: string
other_pages: { [key: string]: string }
next_link?: string
}
export interface SearchResultRelatedQuestion {
question: string
snippet: string
title: string
link: string
displayed_link: string
thumbnail: string
next_page_token: string
serpapi_link: string
date?: string
}
export interface RelatedSearch {
query: string
link: string
}
export interface SearchInformation {
organic_results_state: string
query_displayed: string
total_results: number
time_taken_displayed: number
menu_items: MenuItem[]
}
export interface MenuItem {
position: number
title: string
link: string
serpapi_link?: string
}
export interface SearchMetadata {
id: string
status: string
json_endpoint: string
created_at: string
processed_at: string
google_url: string
raw_html_file: string
total_time_taken: number
}
export interface SearchParameters {
engine: string
q: string
google_domain: string
device?: 'desktop' | 'tablet' | 'mobile'
}
export type ClientParams = Partial<Omit<GoogleParameters, 'q'>>
}
/**
* Lightweight wrapper around SerpAPI for Google search.
*
* @see https://serpapi.com/search-api
*/
export class SerpAPIClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
protected readonly params: serpapi.ClientParams
constructor({
apiKey = getEnv('SERPAPI_API_KEY') ?? getEnv('SERP_API_KEY'),
apiBaseUrl = serpapi.API_BASE_URL,
ky = defaultKy,
...params
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} & serpapi.ClientParams = {}) {
assert(
apiKey,
'SerpAPIClient missing required "apiKey" (defaults to "SERPAPI_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.params = params
this.ky = ky.extend({
prefixUrl: this.apiBaseUrl
})
}
@aiFunction({
name: 'serpapi_google_search',
description:
'Uses Google Search to return the most relevant web pages for a given query. Useful for finding up-to-date news and information about any topic.',
inputSchema: z.object({
q: z.string().describe('search query'),
num: z
.number()
.int()
.positive()
.default(5)
.optional()
.describe('number of results to return')
})
})
async search(queryOrOpts: string | serpapi.GoogleParameters) {
const defaultGoogleParams: Partial<serpapi.GoogleParameters> = {}
const options: serpapi.GoogleParameters =
typeof queryOrOpts === 'string'
? { ...defaultGoogleParams, q: queryOrOpts }
: queryOrOpts
const { timeout, ...rest } = this.params
// console.log('SerpAPIClient.search', options)
return this.ky
.get('search', {
searchParams: {
...rest,
engine: 'google',
api_key: this.apiKey,
...(options as any)
},
timeout
})
.json<serpapi.SearchResult>()
}
}

Wyświetl plik

@ -0,0 +1,297 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, omit } from '../utils.js'
export namespace serper {
export const API_BASE_URL = 'https://google.serper.dev'
export const SearchParamsSchema = z.object({
q: z.string().describe('search query'),
autocorrect: z.boolean().default(true).optional(),
gl: z.string().default('us').optional(),
hl: z.string().default('en').optional(),
page: z.number().int().positive().default(1).optional(),
num: z
.number()
.int()
.positive()
.default(10)
.optional()
.describe('number of results to return')
})
export type SearchParams = z.infer<typeof SearchParamsSchema>
export const GeneralSearchSchema = SearchParamsSchema.extend({
type: z
.enum(['search', 'images', 'videos', 'places', 'news', 'shopping'])
.default('search')
.optional()
.describe('Type of Google search to perform')
})
export type GeneralSearchParams = z.infer<typeof GeneralSearchSchema>
export interface SearchResponse {
searchParameters: SearchParameters & { type: 'search' }
organic: Organic[]
answerBox?: AnswerBox
knowledgeGraph?: KnowledgeGraph
topStories?: TopStory[]
peopleAlsoAsk?: PeopleAlsoAsk[]
relatedSearches?: RelatedSearch[]
}
export interface SearchImagesResponse {
searchParameters: SearchParameters & { type: 'images' }
images: Image[]
}
export interface SearchVideosResponse {
searchParameters: SearchParameters & { type: 'videos' }
videos: Video[]
}
export interface SearchPlacesResponse {
searchParameters: SearchParameters & { type: 'places' }
places: Place[]
}
export interface SearchNewsResponse {
searchParameters: SearchParameters & { type: 'news' }
news: News[]
}
export interface SearchShoppingResponse {
searchParameters: SearchParameters & { type: 'shopping' }
shopping: Shopping[]
}
export type Response =
| SearchResponse
| SearchImagesResponse
| SearchVideosResponse
| SearchPlacesResponse
| SearchNewsResponse
| SearchShoppingResponse
export interface KnowledgeGraph {
title: string
type: string
website: string
imageUrl: string
description: string
descriptionSource: string
descriptionLink: string
attributes: Record<string, string>
}
export interface Organic {
title: string
link: string
snippet: string
position: number
imageUrl?: string
sitelinks?: SiteLink[]
}
export interface AnswerBox {
snippet: string
snippetHighlighted?: string[]
title: string
link: string
date?: string
position?: number
}
export interface SiteLink {
title: string
link: string
}
export interface PeopleAlsoAsk {
question: string
snippet: string
title: string
link: string
}
export interface RelatedSearch {
query: string
}
export interface SearchParameters {
q: string
gl: string
hl: string
num: number
autocorrect: boolean
page: number
type: string
engine: string
}
export interface TopStory {
title: string
link: string
source: string
date: string
imageUrl: string
}
export interface Image {
title: string
imageUrl: string
imageWidth: number
imageHeight: number
thumbnailUrl: string
thumbnailWidth: number
thumbnailHeight: number
source: string
domain: string
link: string
googleUrl: string
position: number
}
export interface Video {
title: string
link: string
snippet: string
date: string
imageUrl: string
position: number
}
export interface Place {
position: number
title: string
address: string
latitude: number
longitude: number
category: string
phoneNumber?: string
website: string
cid: string
rating?: number
ratingCount?: number
}
export interface News {
title: string
link: string
snippet: string
date: string
source: string
imageUrl: string
position: number
}
export interface Shopping {
title: string
source: string
link: string
price: string
imageUrl: string
delivery?: Record<string, string>
rating?: number
ratingCount?: number
offers?: string
productId?: string
position: number
}
export type ClientParams = Partial<Omit<SearchParams, 'q'>>
}
/**
* Lightweight wrapper around Serper for Google search.
*
* @see https://serper.dev
*/
export class SerperClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
protected readonly params: serper.ClientParams
constructor({
apiKey = getEnv('SERPER_API_KEY'),
apiBaseUrl = serper.API_BASE_URL,
ky = defaultKy,
...params
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} & serper.ClientParams = {}) {
assert(
apiKey,
'SerperClient missing required "apiKey" (defaults to "SERPER_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.params = params
this.ky = ky.extend({
prefixUrl: this.apiBaseUrl,
headers: {
'x-api-key': this.apiKey
}
})
}
@aiFunction({
name: 'serper_google_search',
description:
'Uses Google Search to return the most relevant web pages for a given query. Useful for finding up-to-date news and information about any topic.',
inputSchema: serper.GeneralSearchSchema.pick({
q: true,
num: true,
type: true
})
})
async search(queryOrOpts: string | serper.GeneralSearchParams) {
const searchType =
typeof queryOrOpts === 'string' ? 'search' : queryOrOpts.type || 'search'
return this._fetch<serper.SearchResponse>(
searchType,
typeof queryOrOpts === 'string' ? queryOrOpts : omit(queryOrOpts, 'type')
)
}
async searchImages(queryOrOpts: string | serper.SearchParams) {
return this._fetch<serper.SearchImagesResponse>('images', queryOrOpts)
}
async searchVideos(queryOrOpts: string | serper.SearchParams) {
return this._fetch<serper.SearchVideosResponse>('videos', queryOrOpts)
}
async searchPlaces(queryOrOpts: string | serper.SearchParams) {
return this._fetch<serper.SearchPlacesResponse>('places', queryOrOpts)
}
async searchNews(queryOrOpts: string | serper.SearchParams) {
return this._fetch<serper.SearchNewsResponse>('news', queryOrOpts)
}
async searchProducts(queryOrOpts: string | serper.SearchParams) {
return this._fetch<serper.SearchShoppingResponse>('shopping', queryOrOpts)
}
protected async _fetch<T extends serper.Response>(
endpoint: string,
queryOrOpts: string | serper.SearchParams
): Promise<T> {
const params = {
...this.params,
...(typeof queryOrOpts === 'string' ? { q: queryOrOpts } : queryOrOpts)
}
return this.ky.post(endpoint, { json: params }).json<T>()
}
}

Wyświetl plik

@ -0,0 +1,459 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { TimeoutError } from '../errors.js'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, delay, getEnv } from '../utils.js'
// TODO: need to expose more aiFunctions
export namespace slack {
export const API_BASE_URL = 'https://slack.com/api'
export const DEFAULT_TIMEOUT_MS = 120_000
export const DEFAULT_INTERVAL_MS = 5000
export interface BotProfile {
id: string
app_id: string
name: string
icons: Record<string, unknown>
deleted: boolean
updated: number
team_id: string
}
export interface Replies {
messages: Message[]
has_more: boolean
ok: boolean
response_metadata: ResponseMetadata
}
export interface Message {
bot_id?: string
client_msg_id?: string
type: string
text: string
user: string
ts: string
app_id?: string
blocks?: Record<string, unknown>[]
reply_count?: number
subscribed?: boolean
last_read?: string
unread_count?: number
team?: string
thread_ts: string
parent_user_id?: string
bot_profile?: BotProfile
}
export interface ResponseMetadata {
next_cursor: string
}
export type Attachment = {
[key: string]: any
}
export type Block = {
[key: string]: any
}
/**
* Parameters for the Slack API's `chat.postMessage` method.
*
* @see {@link https://api.slack.com/methods/chat.postMessage}
*/
export type PostMessageParams = {
/**
* The formatted text of the message to be published.
*/
text: string
/**
* Channel, private group, or IM channel to send the message to. Can be an encoded ID, or a name.
*/
channel?: string
/**
* Provide another message's ts value to make this message a reply. Avoid using a reply's ts value; use its parent instead.
*/
thread_ts?: string
/**
* A JSON-based array of structured attachments, presented as a URL-encoded string.
*/
attachments?: Attachment[]
/**
* A JSON-based array of structured blocks, presented as a URL-encoded string.
*/
blocks?: Block[]
/**
* Emoji to use as the icon for this message. Overrides icon_url.
*/
icon_emoji?: string
/**
* URL to an image to use as the icon for this message.
*/
icon_url?: string
/**
* If set to true, user group handles (to name just one example) will be linked in the message text.
*/
link_names?: boolean
/**
* Change how messages are treated (default: 'none').
*/
parse?: 'full' | 'none'
/**
* Used in conjunction with thread_ts and indicates whether reply should be made visible to everyone in the channel or conversation.
*/
reply_broadcast?: boolean
/**
* Pass true to enable unfurling of primarily text-based content.
*/
unfurl_links?: boolean
/**
* Pass false to disable unfurling of media content.
*/
unfurl_media?: boolean
/**
* Set your bot's user name.
*/
username?: string
}
/**
* Parameters for the Slack API's `conversations.history` method.
*
* @see {@link https://api.slack.com/methods/conversations.history}
*/
export type ConversationHistoryParams = {
/**
* The conversation ID to fetch history for.
*/
channel: string
/**
* Only messages after this Unix timestamp will be included in results (default: `0`).
*/
oldest?: string
/**
* The cursor value used for pagination of results (default: first page).
*/
cursor?: string
/**
* Only messages before this Unix timestamp will be included in results (default: now).
*/
latest?: string
/**
* The maximum number of items to return (default: `100`).
*/
limit?: number
/**
* Include messages with the oldest or latest timestamps in results. Ignored unless either timestamp is specified (default: `false`).
*/
inclusive?: boolean
/**
* Return all metadata associated with the messages (default: `false`).
*/
include_all_metadata?: boolean
}
/**
* Parameters for the Slack API's `conversations.replies` method.
*
* @see {@link https://api.slack.com/methods/conversations.replies}
*/
export type ConversationRepliesParams = {
/**
* The conversation ID to fetch the thread from.
*/
channel: string
/**
* Unique identifier of either a threads parent message or a message in the thread.
*
* ### Notes
*
* - ts must be the timestamp of an existing message with 0 or more replies.
* - If there are no replies then just the single message referenced by ts will return - it is just an ordinary, unthreaded message.
*/
ts: string
/**
* The cursor value used for pagination of results.
* Set this to the `next_cursor` attribute returned by a previous request's response_metadata.
*/
cursor?: string
/**
* Only messages before this Unix timestamp will be included in results.
*/
latest?: string
/**
* Only messages after this Unix timestamp will be included in results.
*/
oddest?: string
/**
* The maximum number of items to return.
* Fewer than the requested number of items may be returned, even if the end of the users list hasn't been reached.
*/
limit?: number
/**
* Include messages with the oldest or latest timestamps in results. Ignored unless either timestamp is specified.
*/
inclusive?: boolean
/**
* Return all metadata associated with this message.
*/
include_thread_metadata?: boolean
}
export type SendAndWaitOptions = {
/**
* The text of the message to send.
*/
text: string
/**
* The ID of the channel to send the message to.
*/
channel?: string
/**
* The timeout in milliseconds to wait for a reply before throwing an error.
*/
timeoutMs?: number
/**
* The interval in milliseconds to poll for replies.
*/
intervalMs?: number
/**
* A function to validate the reply message. If the function returns `true`, the reply is considered valid and the function will return the message. If the function returns `false`, the reply is considered invalid and the function will continue to wait for a reply until the timeout is reached.
*/
validate?: (message: Message) => boolean
/**
* A stop signal from an [`AbortController`](https://developer.mozilla.org/en-US/docs/Web/API/AbortController), which can be used to abort retrying. More specifically, when `AbortController.abort(reason)` is called, the function will throw an error with the `reason` argument as the error message.
*/
stopSignal?: AbortSignal
}
}
/**
* Minimal Slack API client for sending and receiving Slack messages.
*
* @see https://api.slack.com/docs
*/
export class SlackClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly defaultChannel?: string
constructor({
apiKey = getEnv('SLACK_API_KEY'),
apiBaseUrl = slack.API_BASE_URL,
defaultChannel = getEnv('SLACK_DEFAULT_CHANNEL'),
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
defaultChannel?: string
ky?: KyInstance
} = {}) {
assert(
apiKey,
'SlackClient missing required "apiKey" (defaults to "SLACK_API_KEY")'
)
super()
this.defaultChannel = defaultChannel
this.ky = ky.extend({
prefixUrl: apiBaseUrl,
headers: {
Authorization: `Bearer ${apiKey}`
}
})
}
/**
* Sends a message to a channel.
*/
@aiFunction({
name: 'slack_send_message',
description: 'Sends a slack message to a slack channel',
inputSchema: z.object({
text: z
.string()
.describe('Formatted text of the message to be published.'),
channel: z
.string()
.describe(
'Channel, private group, or IM channel to send the message to. Can be an encoded ID, or a name.'
)
})
})
public async sendMessage(options: slack.PostMessageParams) {
if (!options.channel && !this.defaultChannel) {
throw new Error('Error no channel specified')
}
return this.ky
.post('chat.postMessage', {
json: {
channel: this.defaultChannel,
...options
}
})
.json<slack.Message>()
}
/**
* Fetches a conversation's history of messages and events.
*/
public async fetchConversationHistory(
options: slack.ConversationHistoryParams
) {
return this.ky
.get('conversations.history', {
searchParams: options
})
.json<slack.Replies>()
}
/**
* Fetches replies to a message in a channel.
*/
protected async fetchReplies(options: slack.ConversationRepliesParams) {
return this.ky
.get('conversations.replies', {
searchParams: options
})
.json<slack.Replies>()
}
/**
* Returns a list of messages that were sent in a channel after a given
* timestamp both directly and in threads.
*/
private async fetchCandidates(channel: string, ts: string) {
const history = await this.fetchConversationHistory({ channel })
const directReplies = await this.fetchReplies({ channel, ts })
let candidates: slack.Message[] = []
if (directReplies.ok) {
candidates = candidates.concat(directReplies.messages)
}
if (history.ok) {
candidates = candidates.concat(history.messages)
}
// Filter out older messages before the message was sent and drop bot messages:
candidates = candidates.filter(
(message) => message.ts > ts && !message.bot_id
)
// Sort by timestamp so that the most recent messages come first:
candidates.sort((a, b) => {
return Number.parseFloat(b.ts) - Number.parseFloat(a.ts)
})
return candidates
}
/**
* Sends a message to a channel and waits for a reply to the message, which
* is returned if it passes validation.
*/
public async sendMessageAndWaitForReply({
text,
channel = this.defaultChannel,
timeoutMs = slack.DEFAULT_TIMEOUT_MS,
intervalMs = slack.DEFAULT_INTERVAL_MS,
validate = () => true,
stopSignal
}: slack.SendAndWaitOptions) {
if (!channel) {
throw new Error('SlackClient missing required "channel"')
}
let aborted = false
stopSignal?.addEventListener(
'abort',
() => {
aborted = true
},
{ once: true }
)
const res = await this.sendMessage({ text, channel })
if (!res.ts) {
throw new Error('Missing ts in response')
}
const start = Date.now()
let nUserMessages = 0
do {
if (aborted) {
const reason = stopSignal?.reason || 'Aborted waiting for reply'
if (reason instanceof Error) {
throw reason
} else {
throw new TypeError(reason)
}
}
const candidates = await this.fetchCandidates(channel, res.ts)
if (candidates.length > 0) {
const candidate = candidates[0]!
if (validate(candidate)) {
return candidate
}
if (nUserMessages !== candidates.length) {
await this.sendMessage({
text: `Invalid response: ${candidate.text}. Please try again following the instructions.`,
channel,
thread_ts: candidate.ts
})
}
nUserMessages = candidates.length
}
await delay(intervalMs)
} while (Date.now() - start < timeoutMs)
throw new TimeoutError('SlackClient timed out waiting for reply')
}
}

Wyświetl plik

@ -0,0 +1,186 @@
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, pruneNullOrUndefined, throttleKy } from '../utils.js'
export namespace tavily {
export const API_BASE_URL = 'https://api.tavily.com'
// Allow up to 20 requests per minute by default.
export const throttle = pThrottle({
limit: 20,
interval: 60 * 1000
})
export interface SearchOptions {
/** Search query. (required) */
query: string
/** The depth of the search. It can be basic or advanced. Default is basic for quick results and advanced for indepth high quality results but longer response time. Advanced calls equals 2 requests. */
search_depth?: 'basic' | 'advanced'
/** Include a synthesized answer in the search results. Default is `false`. */
include_answer?: boolean
/** Include a list of query related images in the response. Default is `false`. */
include_images?: boolean
/** Include raw content in the search results. Default is `false`. */
include_raw_content?: boolean
/** The number of maximum search results to return. Default is `5`. */
max_results?: number
/** A list of domains to specifically include in the search results. Default is `undefined`, which includes all domains. */
include_domains?: string[]
/** A list of domains to specifically exclude from the search results. Default is `undefined`, which doesn't exclude any domains. */
exclude_domains?: string[]
}
export interface SearchResponse {
/** The search query. */
query: string
/** A list of sorted search results ranked by relevancy. */
results: SearchResult[]
/** The answer to your search query. */
answer?: string
/** A list of query related image urls. */
images?: string[]
/** A list of suggested research follow up questions related to original query. */
follow_up_questions?: string[]
/** How long it took to generate a response. */
response_time: string
}
export interface SearchResult {
/** The url of the search result. */
url: string
/** The title of the search result page. */
title: string
/**
* The most query related content from the scraped url. We use proprietary AI and algorithms to extract only the most relevant content from each url, to optimize for context quality and size.
*/
content: string
/** The parsed and cleaned HTML of the site. For now includes parsed text only. */
raw_content?: string
/** The relevance score of the search result. */
score: string
}
}
/**
* Tavily provides a web search API tailored for LLMs.
*
* @see https://tavily.com
*/
export class TavilyClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('TAVILY_API_KEY'),
apiBaseUrl = tavily.API_BASE_URL,
throttle = true,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
throttle?: boolean
ky?: KyInstance
} = {}) {
assert(
apiKey,
'TavilyClient missing required "apiKey" (defaults to "TAVILY_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
const throttledKy = throttle ? throttleKy(ky, tavily.throttle) : ky
this.ky = throttledKy.extend({
prefixUrl: this.apiBaseUrl
})
}
/**
* Searches the web for pages relevant to the given query and summarizes the results.
*/
@aiFunction({
name: 'tavily_web_search',
description:
'Searches the web to find the most relevant pages for a given query and summarizes the results. Very useful for finding up-to-date news and information about any topic.',
inputSchema: z.object({
query: z
.string()
.describe('The query to search for. Accepts any Google search query.'),
search_depth: z
.enum(['basic', 'advanced'])
.optional()
.describe(
'How deep of a search to perform. Use "basic" for quick results and "advanced" for slower, in-depth results.'
),
include_answer: z
.boolean()
.optional()
.describe(
'Whether or not to include an answer summary in the results.'
),
include_images: z
.boolean()
.optional()
.describe('Whether or not to include images in the results.'),
max_results: z
.number()
.int()
.positive()
.default(5)
.optional()
.describe('Max number of search results to return.')
// include_domains: z
// .array(z.string())
// .optional()
// .describe(
// 'List of domains to specifically include in the search results.'
// ),
// exclude_domains: z
// .array(z.string())
// .optional()
// .describe(
// 'List of domains to specifically exclude from the search results.'
// )
})
})
async search(queryOrOpts: string | tavily.SearchOptions) {
const options =
typeof queryOrOpts === 'string' ? { query: queryOrOpts } : queryOrOpts
const res = await this.ky
.post('search', {
json: {
...options,
api_key: this.apiKey
}
})
.json<tavily.SearchResponse>()
return pruneNullOrUndefined({
...res,
results: res.results?.map(pruneNullOrUndefined)
})
}
}

Wyświetl plik

@ -0,0 +1,567 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { TimeoutError } from '../errors.js'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, delay, getEnv } from '../utils.js'
export namespace twilio {
export const CONVERSATION_API_BASE_URL = 'https://conversations.twilio.com/v1'
export const DEFAULT_TIMEOUT_MS = 1_800_000
export const DEFAULT_INTERVAL_MS = 5000
export const DEFAULT_BOT_NAME = 'agentic'
/**
* Twilio recommends keeping SMS messages to a length of 320 characters or less, so we'll use that as the maximum.
*
* @see {@link https://support.twilio.com/hc/en-us/articles/360033806753-Maximum-Message-Length-with-Twilio-Programmable-Messaging}
*/
export const SMS_LENGTH_SOFT_LIMIT = 320
export const SMS_LENGTH_HARD_LIMIT = 1600
export interface Conversation {
unique_name?: string
date_updated: Date
friendly_name: string
timers: null
account_sid: string
url: string
state: string
date_created: Date
messaging_service_sid: string
sid: string
attributes: string
bindings: null
chat_service_sid: string
links: ConversationLinks
}
export interface ConversationLinks {
participants: string
messages: string
webhooks: string
}
export interface ConversationMessage {
body: string
index: number
author: string
date_updated: Date
media: null
participant_sid: string | null
conversation_sid: string
account_sid: string
delivery: null
url: string
date_created: Date
content_sid: string | null
sid: string
attributes: string
links: {
delivery_receipts: string
}
}
export interface ConversationParticipant {
last_read_message_index: null
date_updated: Date
last_read_timestamp: null
conversation_sid: string
account_sid: string
url: string
date_created: Date
role_sid: string
sid: string
attributes: string
identity?: string
messaging_binding: ConversationMessagingBinding
}
export interface ConversationMessagingBinding {
proxy_address: string
type: string
address: string
}
export interface ConversationMessages {
messages: ConversationMessage[]
meta: {
page: number
page_size: number
first_page_url: string
previous_page_url: string | null
url: string
next_page_url: string | null
key: string
}
}
/**
* Participant Conversation Resource.
*
* This interface represents a participant in a conversation, along with the conversation details.
*/
export interface ParticipantConversation {
/** The unique ID of the Account responsible for this conversation. */
account_sid: string
/** The unique ID of the Conversation Service this conversation belongs to. */
chat_service_sid: string
/** The unique ID of the Participant. */
participant_sid: string
/** The unique string that identifies the conversation participant as Conversation User. */
participant_user_sid: string
/**
* A unique string identifier for the conversation participant as Conversation User.
* This parameter is non-null if (and only if) the participant is using the Conversations SDK to communicate.
*/
participant_identity: string
/**
* Information about how this participant exchanges messages with the conversation.
* A JSON parameter consisting of type and address fields of the participant.
*/
participant_messaging_binding: object
/** The unique ID of the Conversation this Participant belongs to. */
conversation_sid: string
/** An application-defined string that uniquely identifies the Conversation resource. */
conversation_unique_name: string
/** The human-readable name of this conversation, limited to 256 characters. */
conversation_friendly_name: string
/**
* An optional string metadata field you can use to store any data you wish.
* The string value must contain structurally valid JSON if specified.
*/
conversation_attributes: string
/** The date that this conversation was created, given in ISO 8601 format. */
conversation_date_created: string
/** The date that this conversation was last updated, given in ISO 8601 format. */
conversation_date_updated: string
/** Identity of the creator of this Conversation. */
conversation_created_by: string
/** The current state of this User Conversation. One of inactive, active or closed. */
conversation_state: 'inactive' | 'active' | 'closed'
/** Timer date values representing state update for this conversation. */
conversation_timers: object
/** Contains absolute URLs to access the participant and conversation of this conversation. */
links: { participant: string; conversation: string }
}
export type SendAndWaitOptions = {
/**
* The recipient's phone number in E.164 format (e.g. +14565551234).
*/
recipientPhoneNumber?: string
/**
* The text of the message to send (or an array of strings to send as separate messages).
*/
text: string | string[]
/**
* Friendly name of the conversation.
*/
name: string
/**
* The timeout in milliseconds to wait for a reply before throwing an error.
*/
timeoutMs?: number
/**
* The interval in milliseconds to poll for replies.
*/
intervalMs?: number
/**
* A function to validate the reply message. If the function returns `true`, the reply is considered valid and the function will return the message. If the function returns `false`, the reply is considered invalid and the function will continue to wait for a reply until the timeout is reached.
*/
validate?: (message: ConversationMessage) => boolean
/**
* A stop signal from an [`AbortController`](https://developer.mozilla.org/en-US/docs/Web/API/AbortController), which can be used to abort retrying. More specifically, when `AbortController.abort(reason)` is called, the function will throw an error with the `reason` argument as the error message.
*/
stopSignal?: AbortSignal
}
/**
* Chunks a string into an array of chunks.
*
* @param text - string to chunk
* @param maxLength - maximum length of each chunk
*
* @returns array of chunks
*/
export function chunkString(text: string, maxLength: number): string[] {
const words = text.split(' ')
const chunks: string[] = []
let chunk = ''
for (const word of words) {
if (word.length > maxLength) {
// Truncate the word if it's too long and indicate that it was truncated:
chunks.push(word.slice(0, Math.max(0, maxLength - 3)) + '...')
} else if ((chunk + ' ' + word).length > maxLength) {
chunks.push(chunk.trim())
chunk = word
} else {
chunk += (chunk ? ' ' : '') + word
}
}
if (chunk) {
chunks.push(chunk.trim())
}
return chunks
}
/**
* Chunks an array of strings into an array of chunks while preserving
* existing sections.
*
* @param textSections - array of strings to chunk
* @param maxLength - maximum length of each chunk
*
* @returns array of chunks
*/
export function chunkMultipleStrings(
textSections: string[],
maxLength: number
): string[] {
return textSections.flatMap((section) => chunkString(section, maxLength))
}
}
/**
* A client for interacting with the Twilio Conversations API to send automated
* messages and wait for replies.
*
* @see {@link https://www.twilio.com/docs/conversations/api}
*/
export class TwilioClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly phoneNumber: string
protected readonly botName: string
protected readonly defaultRecipientPhoneNumber?: string
constructor({
accountSid = getEnv('TWILIO_ACCOUNT_SID'),
authToken = getEnv('TWILIO_AUTH_TOKEN'),
phoneNumber = getEnv('TWILIO_PHONE_NUMBER'),
defaultRecipientPhoneNumber = getEnv(
'TWILIO_DEFAULT_RECIPIENT_PHONE_NUMBER'
),
apiBaseUrl = twilio.CONVERSATION_API_BASE_URL,
botName = twilio.DEFAULT_BOT_NAME,
ky = defaultKy
}: {
accountSid?: string
authToken?: string
phoneNumber?: string
defaultRecipientPhoneNumber?: string
apiBaseUrl?: string
botName?: string
ky?: KyInstance
} = {}) {
assert(
accountSid,
'TwilioClient missing required "accountSid" (defaults to "TWILIO_ACCOUNT_SID")'
)
assert(
authToken,
'TwilioClient missing required "authToken" (defaults to "TWILIO_AUTH_TOKEN")'
)
assert(
phoneNumber,
'TwilioClient missing required "phoneNumber" (defaults to "TWILIO_PHONE_NUMBER")'
)
super()
if (defaultRecipientPhoneNumber) {
this.defaultRecipientPhoneNumber = defaultRecipientPhoneNumber
}
this.botName = botName
this.phoneNumber = phoneNumber
this.ky = ky.extend({
prefixUrl: apiBaseUrl,
headers: {
Authorization:
'Basic ' +
Buffer.from(`${accountSid}:${authToken}`).toString('base64'),
'Content-Type': 'application/x-www-form-urlencoded'
}
})
}
/**
* Deletes a conversation and all its messages.
*/
async deleteConversation(conversationSid: string) {
return this.ky.delete(`Conversations/${conversationSid}`)
}
/**
* Removes a participant from a conversation.
*/
async removeParticipant({
conversationSid,
participantSid
}: {
conversationSid: string
participantSid: string
}) {
return this.ky.delete(
`Conversations/${conversationSid}/Participants/${participantSid}`
)
}
/**
* Fetches all conversations a participant as identified by their phone number is a part of.
*/
async findParticipantConversations(participantPhoneNumber: string) {
const encodedPhoneNumber = encodeURIComponent(participantPhoneNumber)
return this.ky
.get(`ParticipantConversations?Address=${encodedPhoneNumber}`)
.json<{ conversations: twilio.ParticipantConversation[] }>()
}
/**
* Creates a new conversation.
*/
async createConversation(friendlyName: string) {
const params = new URLSearchParams()
params.set('FriendlyName', friendlyName)
return this.ky
.post('Conversations', {
body: params
})
.json<twilio.Conversation>()
}
/**
* Adds a participant to a conversation.
*/
async addParticipant({
conversationSid,
recipientPhoneNumber
}: {
conversationSid: string
recipientPhoneNumber: string
}) {
const params = new URLSearchParams()
params.set('MessagingBinding.Address', recipientPhoneNumber)
params.set('MessagingBinding.ProxyAddress', this.phoneNumber)
return this.ky
.post(`Conversations/${conversationSid}/Participants`, {
body: params
})
.json<twilio.ConversationParticipant>()
}
/**
* Chunks a long text message into smaller parts and sends them as separate messages.
*/
async sendTextWithChunking({
conversationSid,
text
}: {
conversationSid: string
text: string | string[]
maxChunkLength?: number
}) {
let chunks
if (Array.isArray(text)) {
chunks = twilio.chunkMultipleStrings(text, twilio.SMS_LENGTH_SOFT_LIMIT)
} else {
chunks = twilio.chunkString(text, twilio.SMS_LENGTH_SOFT_LIMIT)
}
const out: twilio.ConversationMessage[] = []
for (const chunk of chunks) {
const sent = await this.sendMessage({
conversationSid,
text: chunk
})
out.push(sent)
}
return out
}
/**
* Posts a message to a conversation.
*/
@aiFunction({
name: 'twilio_send_message',
description:
'Sends an text SMS message via the Twilio Conversation API to a specific conversation.',
inputSchema: z.object({
text: z
.string()
.describe(
'Text of the SMS content to sent. Must be brief as SMS has strict character limits.'
),
conversationSid: z
.string()
.describe('ID of the Twilio Conversation to the send the emssage to.')
})
})
async sendMessage({
conversationSid,
text
}: {
conversationSid: string
text: string
}) {
// Truncate the text if it exceeds the hard limit and add an ellipsis:
if (text.length > twilio.SMS_LENGTH_HARD_LIMIT) {
text =
text.slice(0, Math.max(0, twilio.SMS_LENGTH_HARD_LIMIT - 3)) + '...'
}
const params = new URLSearchParams()
params.set('Body', text)
params.set('Author', this.botName)
return this.ky
.post(`Conversations/${conversationSid}/Messages`, {
body: params
})
.json<twilio.ConversationMessage>()
}
/**
* Fetches all messages in a conversation.
*/
@aiFunction({
name: 'twilio_get_messages',
description:
'Retrieves all SMS messages contained within a specific Twilio Conversation.',
inputSchema: z.object({
conversationSid: z
.string()
.describe(
'ID of the Twilio Conversation to the retrieve the messages for.'
)
})
})
async fetchMessages(
conversationSidOrOptions: string | { conversationSid: string }
) {
const conversationSid =
typeof conversationSidOrOptions === 'string'
? conversationSidOrOptions
: conversationSidOrOptions.conversationSid
return this.ky
.get(`Conversations/${conversationSid}/Messages`)
.json<twilio.ConversationMessages>()
}
/**
* Sends a SMS to a recipient and waits for a reply to the message, which is returned if it passes validation.
*/
public async sendAndWaitForReply({
text,
name,
recipientPhoneNumber = this.defaultRecipientPhoneNumber,
timeoutMs = twilio.DEFAULT_TIMEOUT_MS,
intervalMs = twilio.DEFAULT_INTERVAL_MS,
validate = () => true,
stopSignal
}: twilio.SendAndWaitOptions) {
if (!recipientPhoneNumber) {
throw new Error(
'TwilioClient error missing required "recipientPhoneNumber"'
)
}
let aborted = false
stopSignal?.addEventListener(
'abort',
() => {
aborted = true
},
{ once: true }
)
const { sid: conversationSid } = await this.createConversation(name)
// Find and remove participant from conversation they are currently in, if any:
const { conversations } =
await this.findParticipantConversations(recipientPhoneNumber)
for (const conversation of conversations) {
await this.removeParticipant({
conversationSid: conversation.conversation_sid,
participantSid: conversation.participant_sid
})
}
const { sid: participantSid } = await this.addParticipant({
conversationSid,
recipientPhoneNumber
})
await this.sendTextWithChunking({ conversationSid, text })
const start = Date.now()
let nUserMessages = 0
do {
if (aborted) {
await this.removeParticipant({ conversationSid, participantSid })
const reason = stopSignal?.reason || 'Aborted waiting for reply'
if (reason instanceof Error) {
throw reason
} else {
throw new TypeError(reason)
}
}
const response = await this.fetchMessages(conversationSid)
const candidates = response.messages.filter(
(message) => message.author !== this.botName
)
if (candidates.length > 0) {
const candidate = candidates.at(-1)!
if (candidate && validate(candidate)) {
await this.removeParticipant({ conversationSid, participantSid })
return candidate
}
if (nUserMessages !== candidates.length) {
await this.sendMessage({
text: `Invalid response: ${candidate.body}. Please try again with a valid response format.`,
conversationSid
})
}
nUserMessages = candidates.length
}
await delay(intervalMs)
} while (Date.now() - start < timeoutMs)
await this.removeParticipant({ conversationSid, participantSid })
throw new TimeoutError('Twilio timeout waiting for reply')
}
}

Wyświetl plik

@ -0,0 +1,78 @@
import { auth, Client as TwitterV2Client } from 'twitter-api-sdk'
import { getNango, validateNangoConnectionOAuthScopes } from '../../nango.js'
import { assert, getEnv } from '../../utils.js'
// Auth new Nango accounts here: https://app.nango.dev/connections
// The Twitter OAuth2User class requires a client id, which we don't have
// since we're using Nango for auth, so instead we just pass a dummy value
// and allow Nango to handle all auth/refresh/access token management.
const dummyTwitterClientId = 'agentic'
export const defaultTwitterOAuthScopes = [
'tweet.read',
'users.read',
'offline.access',
'tweet.write'
]
async function createTwitterAuth({
scopes,
nangoConnectionId,
nangoCallbackUrl,
nangoProviderConfigKey
}: {
scopes: string[]
nangoConnectionId: string
nangoCallbackUrl: string
nangoProviderConfigKey: string
}): Promise<auth.OAuth2User> {
const nango = getNango()
const connection = await nango.getConnection(
nangoProviderConfigKey,
nangoConnectionId
)
validateNangoConnectionOAuthScopes({
connection,
scopes
})
const token = connection.credentials.raw
assert(token)
return new auth.OAuth2User({
client_id: dummyTwitterClientId,
callback: nangoCallbackUrl,
scopes: scopes as any[],
token
})
}
export async function createTwitterV2Client({
scopes = defaultTwitterOAuthScopes,
nangoConnectionId = getEnv('NANGO_CONNECTION_ID'),
nangoCallbackUrl = getEnv('NANGO_CALLBACK_URL') ??
'https://api.nango.dev/oauth/callback',
nangoProviderConfigKey = 'twitter-v2'
}: {
scopes?: string[]
nangoConnectionId?: string
nangoCallbackUrl?: string
nangoProviderConfigKey?: string
} = {}): Promise<TwitterV2Client> {
assert(nangoConnectionId, 'twitter client missing nangoConnectionId')
assert(nangoCallbackUrl, 'twitter client missing nangoCallbackUrl')
// NOTE: Nango handles refreshing the oauth access token for us
const twitterAuth = await createTwitterAuth({
scopes,
nangoConnectionId,
nangoCallbackUrl,
nangoProviderConfigKey
})
// Twitter API v2 using OAuth 2.0
return new TwitterV2Client(twitterAuth)
}

Wyświetl plik

@ -0,0 +1,32 @@
export type TwitterErrorType =
| 'twitter:forbidden'
| 'twitter:auth'
| 'twitter:rate-limit'
| 'twitter:unknown'
| 'network'
export class TwitterError extends Error {
type: TwitterErrorType
isFinal: boolean
status?: number
constructor(
message: string,
{
type,
isFinal = false,
status,
...opts
}: ErrorOptions & {
type: TwitterErrorType
isFinal?: boolean
status?: number
}
) {
super(message, opts)
this.type = type
this.isFinal = isFinal
this.status = status ?? (opts.cause as any)?.status
}
}

Wyświetl plik

@ -0,0 +1,5 @@
export * from './client.js'
export * from './error.js'
export * from './twitter-client.js'
export type * from './types.js'
export * from './utils.js'

Wyświetl plik

@ -0,0 +1,453 @@
import pThrottle from 'p-throttle'
import { z } from 'zod'
import type * as types from './types.js'
import { aiFunction, AIFunctionsProvider } from '../../fns.js'
import { assert, getEnv } from '../../utils.js'
import { handleKnownTwitterErrors } from './utils.js'
/**
* This file contains rate-limited wrappers around all of the core Twitter API
* methods that this project uses.
*
* NOTE: Twitter has different API rate limits and quotas per plan, so in order
* to rate-limit effectively, our throttles need to either use the lowest common
* denominator OR vary based on the twitter developer plan you're using. We
* chose to go with the latter.
*
* @see https://developer.twitter.com/en/docs/twitter-api/rate-limits
*/
type TwitterApiMethod =
| 'createTweet'
| 'usersIdMentions'
| 'findTweetById'
| 'findTweetsById'
| 'searchRecentTweets'
| 'findUserById'
| 'findUserByUsername'
const TWENTY_FOUR_HOURS_MS = 24 * 60 * 60 * 1000
const FIFTEEN_MINUTES_MS = 15 * 60 * 1000
const twitterApiRateLimitsByPlan: Record<
types.TwitterApiPlan,
Record<
TwitterApiMethod,
{
readonly limit: number
readonly interval: number
}
>
> = {
free: {
// 50 per 24h per user
// 50 per 24h per app
createTweet: { limit: 50, interval: TWENTY_FOUR_HOURS_MS },
// TODO: according to the twitter docs, this shouldn't be allowed on the
// free plan, but it seems to work...
usersIdMentions: { limit: 1, interval: FIFTEEN_MINUTES_MS },
findTweetById: { limit: 1, interval: FIFTEEN_MINUTES_MS },
findTweetsById: { limit: 1, interval: FIFTEEN_MINUTES_MS },
searchRecentTweets: { limit: 1, interval: FIFTEEN_MINUTES_MS },
findUserById: { limit: 1, interval: FIFTEEN_MINUTES_MS },
findUserByUsername: { limit: 1, interval: FIFTEEN_MINUTES_MS }
},
basic: {
// 100 per 24h per user
// 1667 per 24h per app
createTweet: { limit: 100, interval: TWENTY_FOUR_HOURS_MS },
// https://developer.twitter.com/en/docs/twitter-api/tweets/timelines/api-reference/get-users-id-mentions
// TODO: undocumented
// 180 per 15m per user
// 450 per 15m per app
usersIdMentions: { limit: 180, interval: FIFTEEN_MINUTES_MS },
// 15 per 15m per user
// 15 per 15m per app
findTweetById: { limit: 15, interval: FIFTEEN_MINUTES_MS },
findTweetsById: { limit: 15, interval: FIFTEEN_MINUTES_MS },
// 60 per 15m per user
// 60 per 15m per app
searchRecentTweets: { limit: 60, interval: FIFTEEN_MINUTES_MS },
findUserById: { limit: 100, interval: TWENTY_FOUR_HOURS_MS },
findUserByUsername: { limit: 100, interval: TWENTY_FOUR_HOURS_MS }
},
pro: {
// 100 per 15m per user
// 10k per 24h per app
createTweet: { limit: 100, interval: FIFTEEN_MINUTES_MS },
// 180 per 15m per user
// 450 per 15m per app
usersIdMentions: { limit: 180, interval: FIFTEEN_MINUTES_MS },
// TODO: why would the per-user rate-limit be less than the per-app one?!
// 900 per 15m per user
// 450 per 15m per app
findTweetById: { limit: 450, interval: FIFTEEN_MINUTES_MS },
findTweetsById: { limit: 450, interval: FIFTEEN_MINUTES_MS },
// TODO: why would the per-user rate-limit be less than the per-app one?!
// 456 per 15m per user
// 300 per 15m per app
searchRecentTweets: { limit: 300, interval: FIFTEEN_MINUTES_MS },
findUserById: { limit: 300, interval: FIFTEEN_MINUTES_MS },
findUserByUsername: { limit: 300, interval: FIFTEEN_MINUTES_MS }
},
enterprise: {
// NOTE: these are just placeholders; the enterprise plan seems to be
// completely customizable, but it's still useful to define rate limits
// for robustness. These values just 10x those of the pro plan.
createTweet: { limit: 1000, interval: FIFTEEN_MINUTES_MS },
usersIdMentions: { limit: 1800, interval: FIFTEEN_MINUTES_MS },
findTweetById: { limit: 4500, interval: FIFTEEN_MINUTES_MS },
findTweetsById: { limit: 4500, interval: FIFTEEN_MINUTES_MS },
searchRecentTweets: { limit: 3000, interval: FIFTEEN_MINUTES_MS },
findUserById: { limit: 3000, interval: FIFTEEN_MINUTES_MS },
findUserByUsername: { limit: 3000, interval: FIFTEEN_MINUTES_MS }
}
}
export class TwitterClient extends AIFunctionsProvider {
readonly client: types.TwitterV2Client
readonly twitterApiPlan: types.TwitterApiPlan
constructor({
client,
twitterApiPlan = (getEnv('TWITTER_API_PLAN') as types.TwitterApiPlan) ??
'free'
}: {
client: types.TwitterV2Client
twitterApiPlan?: types.TwitterApiPlan
}) {
assert(
client,
'TwitterClient missing required "client" which should be an instance of "twitter-api-sdk" (use `getTwitterV2Client` to initialize the underlying V2 Twitter SDK using Nango OAuth)'
)
assert(twitterApiPlan, 'TwitterClient missing required "twitterApiPlan"')
super()
this.client = client
this.twitterApiPlan = twitterApiPlan
const twitterApiRateLimits = twitterApiRateLimitsByPlan[twitterApiPlan]!
assert(twitterApiRateLimits, `Invalid twitter api plan: ${twitterApiPlan}`)
const createTweetThrottle = pThrottle(twitterApiRateLimits.createTweet)
const findTweetByIdThrottle = pThrottle(twitterApiRateLimits.findTweetById)
const findTweetsByIdThrottle = pThrottle(
twitterApiRateLimits.findTweetsById
)
const searchRecentTweetsThrottle = pThrottle(
twitterApiRateLimits.searchRecentTweets
)
const findUserByIdThrottle = pThrottle(twitterApiRateLimits.findUserById)
const findUserByUsernameThrottle = pThrottle(
twitterApiRateLimits.findUserByUsername
)
this._createTweet = createTweetThrottle(createTweetImpl(this.client))
this._findTweetById = findTweetByIdThrottle(findTweetByIdImpl(this.client))
this._findTweetsById = findTweetsByIdThrottle(
findTweetsByIdImpl(this.client)
)
this._searchRecentTweets = searchRecentTweetsThrottle(
searchRecentTweetsImpl(this.client)
)
this._findUserById = findUserByIdThrottle(findUserByIdImpl(this.client))
this._findUserByUsername = findUserByUsernameThrottle(
findUserByUsernameImpl(this.client)
)
}
protected _createTweet: ReturnType<typeof createTweetImpl>
protected _findTweetById: ReturnType<typeof findTweetByIdImpl>
protected _findTweetsById: ReturnType<typeof findTweetsByIdImpl>
protected _searchRecentTweets: ReturnType<typeof searchRecentTweetsImpl>
protected _findUserById: ReturnType<typeof findUserByIdImpl>
protected _findUserByUsername: ReturnType<typeof findUserByUsernameImpl>
@aiFunction({
name: 'create_tweet',
description: 'Creates a new tweet',
inputSchema: z.object({
text: z.string().min(1)
})
})
async createTweet(
params: types.CreateTweetParams
): Promise<types.CreatedTweet> {
return this._createTweet(params)
}
@aiFunction({
name: 'get_tweet_by_id',
description: 'Fetch a tweet by its ID',
inputSchema: z.object({
id: z.string().min(1)
})
})
async findTweetById({
id,
...params
}: { id: string } & types.FindTweetByIdParams) {
assert(
this.twitterApiPlan !== 'free',
'TwitterClient.findTweetById not supported on free plan'
)
return this._findTweetById(id, params)
}
@aiFunction({
name: 'get_tweets_by_id',
description: 'Fetch an array of tweets by their IDs',
inputSchema: z.object({
ids: z.array(z.string().min(1))
})
})
async findTweetsById({ ids, ...params }: types.FindTweetsByIdParams) {
assert(
this.twitterApiPlan !== 'free',
'TwitterClient.findTweetsById not supported on free plan'
)
return this._findTweetsById(ids, params)
}
@aiFunction({
name: 'search_recent_tweets',
description: 'Searches for recent tweets',
inputSchema: z.object({
query: z.string().min(1),
sort_order: z
.enum(['recency', 'relevancy'])
.default('relevancy')
.optional()
})
})
async searchRecentTweets(params: types.SearchRecentTweetsParams) {
assert(
this.twitterApiPlan !== 'free',
'TwitterClient.searchRecentTweets not supported on free plan'
)
return this._searchRecentTweets(params)
}
@aiFunction({
name: 'get_twitter_user_by_id',
description: 'Fetch a twitter user by ID',
inputSchema: z.object({
id: z.string().min(1)
})
})
async findUserById({
id,
...params
}: { id: string } & types.FindUserByIdParams) {
assert(
this.twitterApiPlan !== 'free',
'TwitterClient.findUserById not supported on free plan'
)
return this._findUserById(id, params)
}
@aiFunction({
name: 'get_twitter_user_by_username',
description: 'Fetch a twitter user by username',
inputSchema: z.object({
username: z.string().min(1)
})
})
async findUserByUsername({
username,
...params
}: { username: string } & types.FindUserByUsernameParams) {
assert(
this.twitterApiPlan !== 'free',
'TwitterClient.findUserByUsername not supported on free plan'
)
return this._findUserByUsername(username, params)
}
}
const defaultTwitterQueryTweetFields: types.TwitterQueryTweetFields = [
'attachments',
'author_id',
'conversation_id',
'created_at',
'entities',
'geo',
'id',
'in_reply_to_user_id',
'lang',
'public_metrics',
'possibly_sensitive',
'referenced_tweets',
'text'
// 'context_annotations', // not needed (way too verbose and noisy)
// 'edit_controls', / not needed
// 'non_public_metrics', // don't have access to
// 'organic_metrics', // don't have access to
// 'promoted_metrics, // don't have access to
// 'reply_settings', / not needed
// 'source', // not needed
// 'withheld' // not needed
]
const defaultTwitterQueryUserFields: types.TwitterQueryUserFields = [
'created_at',
'description',
'entities',
'id',
'location',
'name',
'pinned_tweet_id',
'profile_image_url',
'protected',
'public_metrics',
'url',
'username',
'verified'
// 'most_recent_tweet_id',
// 'verified_type',
// 'withheld'
]
const defaultTweetQueryParams: types.TweetsQueryOptions = {
// https://developer.twitter.com/en/docs/twitter-api/expansions
expansions: [
'author_id',
'in_reply_to_user_id',
'referenced_tweets.id',
'referenced_tweets.id.author_id',
'entities.mentions.username',
// TODO
'attachments.media_keys',
'geo.place_id',
'attachments.poll_ids'
],
'tweet.fields': defaultTwitterQueryTweetFields,
'user.fields': defaultTwitterQueryUserFields
}
const defaultUserQueryParams: types.TwitterUserQueryOptions = {
// https://developer.twitter.com/en/docs/twitter-api/expansions
expansions: ['pinned_tweet_id'],
'tweet.fields': defaultTwitterQueryTweetFields,
'user.fields': defaultTwitterQueryUserFields
}
function createTweetImpl(client: types.TwitterV2Client) {
return async (
params: types.CreateTweetParams
): Promise<types.CreatedTweet> => {
try {
const { data: tweet } = await client.tweets.createTweet(params)
if (!tweet?.id) {
throw new Error('invalid createTweet response')
}
return tweet
} catch (err: any) {
console.error('error creating tweet', JSON.stringify(err, null, 2))
handleKnownTwitterErrors(err, { label: 'creating tweet' })
throw err
}
}
}
function findTweetByIdImpl(client: types.TwitterV2Client) {
return async (tweetId: string, params?: types.FindTweetByIdParams) => {
try {
return await client.tweets.findTweetById(tweetId, {
...defaultTweetQueryParams,
...params
})
} catch (err: any) {
handleKnownTwitterErrors(err, { label: `fetching tweet ${tweetId}` })
throw err
}
}
}
function findTweetsByIdImpl(client: types.TwitterV2Client) {
return async (
ids: string[],
params?: Omit<types.FindTweetsByIdParams, 'ids'>
) => {
try {
return await client.tweets.findTweetsById({
...defaultTweetQueryParams,
...params,
ids
})
} catch (err: any) {
handleKnownTwitterErrors(err, { label: `fetching ${ids.length} tweets` })
throw err
}
}
}
function searchRecentTweetsImpl(client: types.TwitterV2Client) {
return async (params: types.SearchRecentTweetsParams) => {
try {
return await client.tweets.tweetsRecentSearch({
...defaultTweetQueryParams,
...params
})
} catch (err: any) {
handleKnownTwitterErrors(err, {
label: `searching tweets query "${params.query}"`
})
throw err
}
}
}
function findUserByIdImpl(client: types.TwitterV2Client) {
return async (userId: string, params?: types.FindUserByIdParams) => {
try {
return await client.users.findUserById(userId, {
...defaultUserQueryParams,
...params
})
} catch (err: any) {
handleKnownTwitterErrors(err, {
label: `fetching user with id ${userId}`
})
throw err
}
}
}
function findUserByUsernameImpl(client: types.TwitterV2Client) {
return async (username: string, params?: types.FindUserByUsernameParams) => {
try {
return await client.users.findUserByUsername(username, {
...defaultUserQueryParams,
...params
})
} catch (err: any) {
handleKnownTwitterErrors(err, {
label: `fetching user with username ${username}`
})
throw err
}
}
}

Wyświetl plik

@ -0,0 +1,75 @@
import type { AsyncReturnType, Simplify } from 'type-fest'
import { type Client as TwitterV2Client } from 'twitter-api-sdk'
export { type Client as TwitterV2Client } from 'twitter-api-sdk'
export type TwitterApiPlan = 'free' | 'basic' | 'pro' | 'enterprise'
export type TweetsQueryOptions = Simplify<
Pick<
Parameters<TwitterV2Client['tweets']['findTweetsById']>[0],
'expansions' | 'tweet.fields' | 'user.fields'
>
>
export type TwitterUserQueryOptions = Simplify<
Pick<
NonNullable<Parameters<TwitterV2Client['users']['findUserById']>[1]>,
'expansions' | 'tweet.fields' | 'user.fields'
>
>
export type TwitterQueryTweetFields = TweetsQueryOptions['tweet.fields']
export type TwitterQueryUserFields = TweetsQueryOptions['user.fields']
export type TwitterUserIdMentionsQueryOptions = Simplify<
NonNullable<Parameters<TwitterV2Client['tweets']['usersIdMentions']>[1]>
>
export type CreateTweetParams = Simplify<
Parameters<TwitterV2Client['tweets']['createTweet']>[0]
>
export type UsersIdMentionsParams = Simplify<
Parameters<TwitterV2Client['tweets']['usersIdMentions']>[1]
>
export type FindTweetByIdParams = Simplify<
Parameters<TwitterV2Client['tweets']['findTweetById']>[1]
>
export type FindTweetsByIdParams = Simplify<
Parameters<TwitterV2Client['tweets']['findTweetsById']>[0]
>
export type SearchRecentTweetsParams = Simplify<
Parameters<TwitterV2Client['tweets']['tweetsRecentSearch']>[0]
>
export type FindUserByIdParams = Simplify<
Parameters<TwitterV2Client['users']['findUserById']>[1]
>
export type FindUserByUsernameParams = Simplify<
Parameters<TwitterV2Client['users']['findUserByUsername']>[1]
>
type Unpacked<T> = T extends (infer U)[] ? U : T
export type Tweet = Simplify<
NonNullable<
Unpacked<
AsyncReturnType<TwitterV2Client['tweets']['findTweetsById']>['data']
>
>
>
export type TwitterUser = Simplify<
NonNullable<AsyncReturnType<TwitterV2Client['users']['findMyUser']>['data']>
>
export type CreatedTweet = Simplify<
NonNullable<AsyncReturnType<TwitterV2Client['tweets']['createTweet']>['data']>
>
export type TwitterUrl = Simplify<
Unpacked<NonNullable<NonNullable<Tweet['entities']>['urls']>>
>

Wyświetl plik

@ -0,0 +1,140 @@
import type * as types from './types.js'
import { omit } from '../../utils.js'
import { TwitterError } from './error.js'
/**
* Error handler which takes in an unknown Error object and converts it to a
* structured TwitterError object for a set of common Twitter API errors.
*
* Re-throws the error and will never return.
*/
export function handleKnownTwitterErrors(
err: any,
{ label = '' }: { label?: string } = {}
) {
if (err.status === 403) {
// user may have deleted the tweet we're trying to respond to
throw new TwitterError(
err.error?.detail || `error ${label}: 403 forbidden`,
{
type: 'twitter:forbidden',
isFinal: true,
cause: err
}
)
} else if (err.status === 401) {
throw new TwitterError(`error ${label}: unauthorized`, {
type: 'twitter:auth',
cause: err
})
} else if (err.status === 400) {
if (
/value passed for the token was invalid/i.test(
err.error?.error_description
)
) {
throw new TwitterError(`error ${label}: invalid auth token`, {
type: 'twitter:auth',
cause: err
})
}
} else if (err.status === 429) {
throw new TwitterError(`error ${label}: too many requests`, {
type: 'twitter:rate-limit',
cause: err
})
} else if (err.status === 404) {
throw new TwitterError(err.toString(), {
type: 'twitter:forbidden',
isFinal: true,
cause: err
})
}
if (err.status >= 400 && err.status < 500) {
throw new TwitterError(
`error ${label}: ${err.status} ${
err.error?.description || err.toString()
}`,
{
type: 'twitter:unknown',
isFinal: true,
cause: err
}
)
} else if (err.status >= 500) {
throw new TwitterError(
`error ${label}: ${err.status} ${
err.error?.description || err.toString()
}`,
{
type: 'twitter:unknown',
isFinal: false,
cause: err
}
)
}
const reason = err.toString().toLowerCase()
if (reason.includes('fetcherror') || reason.includes('enotfound')) {
throw new TwitterError(err.toString(), {
type: 'network',
cause: err
})
}
// Otherwise, propagate the original error
throw err
}
export function getPrunedTweet(
tweet: Partial<types.Tweet>
): Partial<types.Tweet> {
const urls = tweet.entities?.urls
let text = tweet.text
if (text && urls) {
for (const url of urls) {
if (!url.expanded_url || !url.url) continue
text = text!.replaceAll(url.url, url.expanded_url!)
}
}
return {
...omit(
tweet,
'conversation_id',
'public_metrics',
'created_at',
'entities',
'possibly_sensitive'
),
text
}
}
export function getPrunedTwitterUser(
twitterUser: Partial<types.TwitterUser>
): Partial<types.TwitterUser> {
const urls = twitterUser.entities?.description?.urls
let description = twitterUser.description
if (description && urls) {
for (const url of urls) {
if (!url.expanded_url || !url.url) continue
description = description!.replaceAll(url.url, url.expanded_url!)
}
}
return {
...omit(
twitterUser,
'public_metrics',
'created_at',
'verified',
'protected',
'url',
'entities'
),
description
}
}

Wyświetl plik

@ -0,0 +1,134 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv } from '../utils.js'
export namespace weatherapi {
export const BASE_URL = 'https://api.weatherapi.com/v1'
export interface CurrentWeatherResponse {
current: CurrentWeather
location: WeatherLocation
}
export interface CurrentWeather {
cloud: number
condition: WeatherCondition
feelslike_c: number
feelslike_f: number
gust_kph: number
gust_mph: number
humidity: number
is_day: number
last_updated: string
last_updated_epoch: number
precip_in: number
precip_mm: number
pressure_in: number
pressure_mb: number
temp_c: number
temp_f: number
uv: number
vis_km: number
vis_miles: number
wind_degree: number
wind_dir: string
wind_kph: number
wind_mph: number
}
export interface WeatherCondition {
code: number
icon: string
text: string
}
export interface WeatherLocation {
country: string
lat: number
localtime: string
localtime_epoch: number
lon: number
name: string
region: string
tz_id: string
}
export interface WeatherIPInfoResponse {
ip: string
type: string
continent_code: string
continent_name: string
country_code: string
country_name: string
is_eu: string
geoname_id: number
city: string
region: string
lat: number
lon: number
tz_id: string
localtime_epoch: number
localtime: string
}
}
/**
* Simple Weather API client for accessing weather data based on location.
*
* @see https://www.weatherapi.com
*/
export class WeatherClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiKey: string
protected readonly apiBaseUrl: string
constructor({
apiKey = getEnv('WEATHER_API_KEY'),
apiBaseUrl = weatherapi.BASE_URL,
ky = defaultKy
}: {
apiKey?: string
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
apiKey,
'WeatherClient missing required "apiKey" (defaults to "WEATHER_API_KEY")'
)
super()
this.apiKey = apiKey
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({ prefixUrl: apiBaseUrl })
}
@aiFunction({
name: 'get_current_weather',
description: 'Gets info about the current weather at a given location.',
inputSchema: z.object({
q: z
.string()
.describe(
'Location to get the weather for. Can be a city name, zipcode, IP address, or lat/lng coordinates. Example: "London"'
)
})
})
async getCurrentWeather(queryOrOptions: string | { q: string }) {
const options =
typeof queryOrOptions === 'string'
? { q: queryOrOptions }
: queryOrOptions
return this.ky
.get('current.json', {
searchParams: {
key: this.apiKey,
...options
}
})
.json<weatherapi.CurrentWeatherResponse>()
}
}

Wyświetl plik

@ -0,0 +1,187 @@
import defaultKy, { type KyInstance } from 'ky'
import pThrottle from 'p-throttle'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv, throttleKy } from '../utils.js'
export namespace wikipedia {
// Allow up to 200 requests per second by default.
export const throttle = pThrottle({
limit: 200,
interval: 1000
})
export interface SearchOptions {
query: string
limit?: number
}
export interface PageSearchResponse {
pages: Page[]
}
export interface Page {
id: number
key: string
title: string
matched_title: null
excerpt: string
description: null | string
thumbnail: Thumbnail | null
}
export interface Thumbnail {
url: string
width: number
height: number
mimetype: string
duration: null
}
export interface PageSummaryOptions {
title: string
redirect?: boolean
acceptLanguage?: string
}
export interface PageSummary {
ns?: number
index?: number
type: string
title: string
displaytitle: string
namespace: { id: number; text: string }
wikibase_item: string
titles: { canonical: string; normalized: string; display: string }
pageid: number
thumbnail: {
source: string
width: number
height: number
}
originalimage: {
source: string
width: number
height: number
}
lang: string
dir: string
revision: string
tid: string
timestamp: string
description: string
description_source: string
content_urls: {
desktop: {
page: string
revisions: string
edit: string
talk: string
}
mobile: {
page: string
revisions: string
edit: string
talk: string
}
}
extract: string
extract_html: string
normalizedtitle?: string
coordinates?: {
lat: number
lon: number
}
}
}
/**
* Basic Wikipedia API client for searching wiki pages and resolving page data.
*
* @see https://www.mediawiki.org/wiki/API
*/
export class WikipediaClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly apiBaseUrl: string
protected readonly apiUserAgent: string
constructor({
apiBaseUrl = getEnv('WIKIPEDIA_API_BASE_URL') ??
'https://en.wikipedia.org/api/rest_v1',
apiUserAgent = getEnv('WIKIPEDIA_API_USER_AGENT') ??
'Agentic (https://github.com/transitive-bullshit/agentic)',
throttle = true,
ky = defaultKy
}: {
apiBaseUrl?: string
apiUserAgent?: string
throttle?: boolean
ky?: KyInstance
} = {}) {
assert(apiBaseUrl, 'WikipediaClient missing required "apiBaseUrl"')
assert(apiUserAgent, 'WikipediaClient missing required "apiUserAgent"')
super()
this.apiBaseUrl = apiBaseUrl
this.apiUserAgent = apiUserAgent
const throttledKy = throttle ? throttleKy(ky, wikipedia.throttle) : ky
this.ky = throttledKy.extend({
headers: {
'api-user-agent': apiUserAgent
}
})
}
@aiFunction({
name: 'wikipedia_search',
description: 'Searches Wikipedia for pages matching the given query.',
inputSchema: z.object({
query: z.string().describe('Search query')
})
})
async search({ query, ...opts }: wikipedia.SearchOptions) {
return (
// https://www.mediawiki.org/wiki/API:REST_API
this.ky
.get('https://en.wikipedia.org/w/rest.php/v1/search/page', {
searchParams: { q: query, ...opts }
})
.json<wikipedia.PageSearchResponse>()
)
}
@aiFunction({
name: 'wikipedia_get_page_summary',
description: 'Gets a summary of the given Wikipedia page.',
inputSchema: z.object({
title: z.string().describe('Wikipedia page title'),
acceptLanguage: z
.string()
.optional()
.default('en-us')
.describe('Locale code for the language to use.')
})
})
async getPageSummary({
title,
acceptLanguage = 'en-us',
redirect = true,
...opts
}: wikipedia.PageSummaryOptions) {
title = title.trim().replaceAll(' ', '_')
// https://en.wikipedia.org/api/rest_v1/
return this.ky
.get(`page/summary/${title}`, {
prefixUrl: this.apiBaseUrl,
searchParams: { redirect, ...opts },
headers: {
'accept-language': acceptLanguage
}
})
.json<wikipedia.PageSummary>()
}
}

Wyświetl plik

@ -0,0 +1,90 @@
import defaultKy, { type KyInstance } from 'ky'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { assert, getEnv } from '../utils.js'
export namespace wolframalpha {
export const API_BASE_URL = 'https://www.wolframalpha.com/api/'
export const AskWolframAlphaOptionsSchema = z.object({
input: z.string().describe('english query'),
maxchars: z
.number()
.int()
.positive()
.default(6000)
.optional()
.describe('max characters to generate in the response')
})
export type AskWolframAlphaOptions = z.infer<
typeof AskWolframAlphaOptionsSchema
>
}
/**
* Wolfram Alpha LLM API client for answering computational, mathematical, and
* scientific questions.
*
* @see https://products.wolframalpha.com/llm-api/documentation
*/
export class WolframAlphaClient extends AIFunctionsProvider {
protected readonly ky: KyInstance
protected readonly appId: string
protected readonly apiBaseUrl: string
constructor({
appId = getEnv('WOLFRAM_APP_ID'),
apiBaseUrl = wolframalpha.API_BASE_URL,
ky = defaultKy
}: {
appId?: string
apiBaseUrl?: string
ky?: KyInstance
} = {}) {
assert(
appId,
'WolframAlphaClient missing required "appId" (defaults to "WOLFRAM_APP_ID")'
)
super()
this.appId = appId
this.apiBaseUrl = apiBaseUrl
this.ky = ky.extend({
prefixUrl: apiBaseUrl,
headers: {
Authorization: `Bearer ${appId}`
}
})
}
@aiFunction({
name: 'ask_wolfram_alpha',
description: `
- WolframAlpha understands natural language queries about entities in chemistry, physics, geography, history, art, astronomy, and more.
- WolframAlpha performs mathematical calculations, date and unit conversions, formula solving, etc.
- Convert inputs to simplified keyword queries whenever possible (e.g. convert "how many people live in France" to "France population").
- Send queries in English only; translate non-English queries before sending, then respond in the original language.
- ALWAYS use this exponent notation: \`6*10^14\`, NEVER \`6e14\`.
- ALWAYS use proper Markdown formatting for all math, scientific, and chemical formulas, symbols, etc.: '$$\n[expression]\n$$' for standalone cases and '( [expression] )' when inline.
- Use ONLY single-letter variable names, with or without integer subscript (e.g., n, n1, n_1).
- Use named physical constants (e.g., 'speed of light') without numerical substitution.
- Include a space between compound units (e.g., "Ω m" for "ohm*meter").
- To solve for a variable in an equation with units, consider solving a corresponding equation without units; exclude counting units (e.g., books), include genuine units (e.g., kg).
- If a WolframAlpha result is not relevant to the query:
- If Wolfram provides multiple 'Assumptions' for a query, choose the more relevant one(s) without explaining the initial result. If you are unsure, ask the user to choose.
- Re-send the exact same 'input' with NO modifications, and add the 'assumption' parameter, formatted as a list, with the relevant values.
- ONLY simplify or rephrase the initial query if a more relevant 'Assumption' or other input suggestions are not provided.
`.trim(),
inputSchema: wolframalpha.AskWolframAlphaOptionsSchema
})
async ask(queryOrOptions: string | wolframalpha.AskWolframAlphaOptions) {
const options =
typeof queryOrOptions === 'string'
? { input: queryOrOptions }
: queryOrOptions
return this.ky.get('v1/llm-api', { searchParams: { ...options } }).text()
}
}

Wyświetl plik

@ -0,0 +1,25 @@
import { Parser } from 'expr-eval'
import { z } from 'zod'
import { createAIFunction } from '../create-ai-function.js'
// TODO: consider using https://github.com/josdejong/mathjs
// TODO: ensure `expr` is sanitized to not run arbitrary code
export const CalculatorInputSchema = z.object({
expr: z.string().describe('mathematical expression to evaluate')
})
export type CalculatorInput = z.infer<typeof CalculatorInputSchema>
export const calculator = createAIFunction(
{
name: 'calculator',
description:
'Computes the result of simple mathematical expressions. Handles basic arithmetic operations like addition, subtraction, multiplication, division, exponentiation, and common functions like sin, cos, abs, exp, and random.',
inputSchema: CalculatorInputSchema
},
async (input: CalculatorInput) => {
const result: number = Parser.evaluate(input.expr)
return result
}
)

56
src/tools/e2b.ts 100644
Wyświetl plik

@ -0,0 +1,56 @@
import { CodeInterpreter, type ProcessMessage } from '@e2b/code-interpreter'
import { z } from 'zod'
import { createAIFunction } from '../create-ai-function.js'
import { getEnv } from '../utils.js'
/**
* E2B Python code interpreter sandbox.
*
* @see https://e2b.dev
*/
export const e2b = createAIFunction(
{
name: 'execute_python',
description: `
Execute python code in a Jupyter notebook cell and returns any result, stdout, stderr, display_data, and error.
- code has access to the internet and can make api requests
- code has access to the filesystem and can read/write files
- coce can install any pip package (if it exists) if you need to, but the usual packages for data analysis are already preinstalled
- code uses python3
- code is executed in a secure sandbox environment, so you don't need to worry about safety
`.trim(),
inputSchema: z.object({
code: z
.string()
.describe('Python code to execute in a single notebook cell.')
})
},
async ({ code }) => {
const sandbox = await CodeInterpreter.create({
apiKey: getEnv('E2B_API_KEY')
})
try {
const exec = await sandbox.notebook.execCell(code, {
onStderr: (msg: ProcessMessage) => {
console.warn('[Code Interpreter stderr]', msg)
},
onStdout: (stdout: ProcessMessage) => {
console.log('[Code Interpreter stdout]', stdout)
}
})
if (exec.error) {
console.error('[Code Interpreter error]', exec.error)
throw new Error(exec.error.value)
}
return exec.results.map((result) => result.toJSON())
} finally {
await sandbox.close()
}
}
)

Wyświetl plik

@ -0,0 +1,138 @@
import pMap from 'p-map'
import { z } from 'zod'
import { aiFunction, AIFunctionsProvider } from '../fns.js'
import { type diffbot, DiffbotClient } from '../services/diffbot-client.js'
import { SerpAPIClient } from '../services/serpapi-client.js'
import { isValidCrawlableUrl, normalizeUrl } from '../url-utils.js'
import { omit, pick } from '../utils.js'
// TODO: allow `search` tool to support other search clients
// (e.g. Bing, Exa, Searxng, Serper, Tavily)
export class SearchAndCrawl extends AIFunctionsProvider {
readonly serpapi: SerpAPIClient
readonly diffbot: DiffbotClient
constructor(opts: { serpapi?: SerpAPIClient; diffbot?: DiffbotClient } = {}) {
super()
this.serpapi = opts.serpapi ?? new SerpAPIClient()
this.diffbot = opts.diffbot ?? new DiffbotClient()
}
@aiFunction({
name: 'search_and_crawl',
description:
'Uses Google to search the web, crawls the results, and then summarizes the most relevant results. Useful for creating in-depth summaries of topics along with sources.',
inputSchema: z.object({
query: z.string().describe('search query')
})
})
async searchAndCrawl({
query,
numSearchResults = 3,
maxCrawlDepth = 1,
maxListItems = 3
}: {
query: string
numSearchResults?: number
maxCrawlDepth?: number
maxListItems?: number
}) {
const crawledUrls = new Set<string>()
const crawlAndScrape = async (
url: string | undefined,
{
depth = 0
}: {
depth?: number
}
): Promise<diffbot.ExtractAnalyzeResponse[]> => {
try {
if (!url) return []
if (!isValidCrawlableUrl(url)) return []
if (crawledUrls.has(url)) return []
const normalizedUrl = normalizeUrl(url)
if (!normalizedUrl) return []
if (crawledUrls.has(normalizedUrl)) return []
crawledUrls.add(url)
crawledUrls.add(normalizedUrl)
console.log('\n\n')
const scrapeResult = await this.diffbot.analyzeUrl({ url })
console.log(
`SearchAndCrawl depth ${depth} - "${url}"`,
pick(scrapeResult, 'type', 'title')
)
if (scrapeResult.type !== 'list') {
return [scrapeResult]
}
if (depth >= maxCrawlDepth) {
return [scrapeResult]
}
const object = scrapeResult.objects?.[0]
if (!object) return [scrapeResult]
const items = object.items
?.filter((item) => item.link)
.slice(0, maxListItems)
if (!items?.length) return [scrapeResult]
const innerScrapeResults = (
await pMap(
items,
async (item) => {
const innerScrapeResult = await crawlAndScrape(item.link, {
depth: depth + 1
})
return innerScrapeResult
},
{
concurrency: 4
}
)
).flat()
return innerScrapeResults
} catch (err) {
console.warn('crawlAndScrape error', url, err)
return []
}
}
const searchResponse = await this.serpapi.search({
q: query,
num: numSearchResults
})
console.log(`SearchAndCrawl search results "${query}"`, searchResponse)
const scrapeResults = (
await pMap(
(searchResponse.organic_results || []).slice(0, numSearchResults),
async (searchResult) => {
return crawlAndScrape(searchResult.link, {
depth: 0
})
},
{
concurrency: 5
}
)
).flat()
const output = {
...omit(searchResponse, 'organic_results'),
scrape_results: scrapeResults
}
console.log(`SearchAndCrawl response for query "${query}"`, output)
return output
}
}

62
src/types.ts 100644
Wyświetl plik

@ -0,0 +1,62 @@
import type { Jsonifiable } from 'type-fest'
import type { z } from 'zod'
import type { AIFunctionSet } from './ai-function-set.js'
import type { AIFunctionsProvider } from './fns.js'
import type { Msg } from './message.js'
export type { Msg } from './message.js'
export type { KyInstance } from 'ky'
export type { ThrottledFunction } from 'p-throttle'
export type Nullable<T> = T | null
export type DeepNullable<T> = T extends object
? { [K in keyof T]: DeepNullable<T[K]> }
: Nullable<T>
export type MaybePromise<T> = T | Promise<T>
export type RelaxedJsonifiable = Jsonifiable | Record<string, Jsonifiable>
export interface AIFunctionSpec {
name: string
description: string
parameters: Record<string, unknown>
}
export interface AIToolSpec {
type: 'function'
function: AIFunctionSpec
}
/** The implementation of the function, with arg parsing and validation. */
export type AIFunctionImpl<Return> = Omit<
(input: string | Msg) => MaybePromise<Return>,
'name' | 'toString' | 'arguments' | 'caller' | 'prototype' | 'length'
>
export type AIFunctionLike = AIFunctionsProvider | AIFunction | AIFunctionSet
/**
* A function meant to be used with LLM function calling.
*/
export interface AIFunction<
InputSchema extends z.ZodObject<any> = z.ZodObject<any>,
Return = any
> {
(input: string | Msg): MaybePromise<Return>
/** The Zod schema for the input object. */
inputSchema: InputSchema
/** Parse the function arguments from a message. */
parseInput(input: string | Msg): z.infer<InputSchema>
/** The function spec for the OpenAI API `functions` property. */
spec: AIFunctionSpec
/** The underlying function implementation without any arg parsing or validation. */
// TODO: this `any` shouldn't be necessary, but it is for `createAIFunction` results to be assignable to `AIFunctionLike`
impl: (params: z.infer<InputSchema> | any) => MaybePromise<Return>
}

Wyświetl plik

@ -0,0 +1,31 @@
import { describe, expect, test } from 'vitest'
import { normalizeUrl } from './url-utils.js'
describe('normalizeUrl', () => {
test('valid urls', async () => {
expect(normalizeUrl('https://www.google.com')).toBe(
'https://www.google.com'
)
expect(normalizeUrl('//www.google.com')).toBe('https://www.google.com')
expect(normalizeUrl('https://www.google.com/foo?')).toBe(
'https://www.google.com/foo'
)
expect(normalizeUrl('https://www.google.com/?foo=bar&dog=cat')).toBe(
'https://www.google.com/?dog=cat&foo=bar'
)
expect(normalizeUrl('https://google.com/abc/123//')).toBe(
'https://google.com/abc/123'
)
})
test('invalid urls', async () => {
expect(normalizeUrl('/foo')).toBe(null)
expect(normalizeUrl('/foo/bar/baz')).toBe(null)
expect(normalizeUrl('://foo.com')).toBe(null)
expect(normalizeUrl('foo')).toBe(null)
expect(normalizeUrl('')).toBe(null)
expect(normalizeUrl(undefined as unknown as string)).toBe(null)
expect(normalizeUrl(null as unknown as string)).toBe(null)
})
})

88
src/url-utils.ts 100644
Wyświetl plik

@ -0,0 +1,88 @@
import isRelativeUrlImpl from 'is-relative-url'
import normalizeUrlImpl, {
type Options as NormalizeUrlOptions
} from 'normalize-url'
import QuickLRU from 'quick-lru'
import { hashObject } from './utils.js'
const protocolAllowList = new Set(['https:', 'http:'])
const normalizedUrlCache = new QuickLRU<string, string | null>({
maxSize: 4000
})
export function isValidCrawlableUrl(url: string): boolean {
try {
if (!url || isRelativeUrl(url)) {
return false
}
const parsedUrl = new URL(url)
if (!protocolAllowList.has(parsedUrl.protocol)) {
return false
}
const normalizedUrl = normalizeUrl(url)
if (!normalizedUrl) {
return false
}
return true
} catch {
return false
}
}
export function isRelativeUrl(url: string): boolean {
if (!url || typeof url !== 'string') return false
return isRelativeUrlImpl(url) && !url.startsWith('//')
}
export function normalizeUrl(
url: string,
options?: NormalizeUrlOptions
): string | null {
let normalizedUrl: string | null | undefined
if (!url || isRelativeUrl(url)) {
return null
}
const opts = {
stripWWW: false,
defaultProtocol: 'https',
normalizeProtocol: true,
forceHttps: false,
stripHash: false,
stripTextFragment: true,
removeQueryParameters: [/^utm_\w+/i, 'ref', 'ref_src'],
removeTrailingSlash: true,
removeSingleSlash: true,
removeExplicitPort: true,
sortQueryParameters: true,
...options
} as Required<NormalizeUrlOptions>
const optionsHash = hashObject(opts)
const cacheKey = `${url}-${optionsHash}`
try {
normalizedUrl = normalizedUrlCache.get(cacheKey)
if (normalizedUrl !== undefined) {
return normalizedUrl
}
normalizedUrl = normalizeUrlImpl(url, opts)
if (!normalizeUrl) {
normalizedUrl = null
}
} catch {
// ignore invalid urls
normalizedUrl = null
}
normalizedUrlCache.set(cacheKey, normalizedUrl!)
return normalizedUrl
}

106
src/utils.test.ts 100644
Wyświetl plik

@ -0,0 +1,106 @@
import ky from 'ky'
import pThrottle from 'p-throttle'
import { describe, expect, test } from 'vitest'
import { mockKyInstance } from './_utils.js'
import {
omit,
pick,
sanitizeSearchParams,
stringifyForModel,
throttleKy
} from './utils.js'
test('pick', () => {
expect(pick({ a: 1, b: 2, c: 3 }, 'a', 'c')).toEqual({ a: 1, c: 3 })
expect(
pick({ a: { b: 'foo' }, d: -1, foo: null } as any, 'b', 'foo')
).toEqual({ foo: null })
})
test('omit', () => {
expect(omit({ a: 1, b: 2, c: 3 }, 'a', 'c')).toEqual({ b: 2 })
expect(
omit({ a: { b: 'foo' }, d: -1, foo: null } as any, 'b', 'foo')
).toEqual({ a: { b: 'foo' }, d: -1 })
})
test('sanitizeSearchParams', () => {
expect(
sanitizeSearchParams({ a: 1, b: undefined, c: 13 }).toString()
).toMatchSnapshot()
expect(sanitizeSearchParams({ a: [1, 2, 3] }).toString()).toMatchSnapshot()
expect(
sanitizeSearchParams({ b: ['a', 'b'], foo: true }).toString()
).toMatchSnapshot()
expect(
sanitizeSearchParams({ b: [false, true, false] }).toString()
).toMatchSnapshot()
expect(
sanitizeSearchParams({
flag: ['foo', 'bar', 'baz'],
token: 'test'
}).toString()
).toMatchSnapshot()
expect(sanitizeSearchParams({}).toString()).toMatchSnapshot()
expect(sanitizeSearchParams({ a: [] }).toString()).toMatchSnapshot()
})
test(
'throttleKy should rate-limit requests to ky properly',
async () => {
const interval = 1000
const throttle = pThrottle({
limit: 1,
interval,
strict: true
})
const ky2 = mockKyInstance(throttleKy(ky, throttle))
const url = 'https://httpbin.org/get'
for (let i = 0; i < 10; i++) {
const before = Date.now()
const res = await ky2.get(url)
const after = Date.now()
const duration = after - before
// console.log(duration, res.status)
expect(res.status).toBe(200)
// leave a bit of wiggle room for the interval
if (i > 0) {
expect(duration >= interval - interval / 5).toBeTruthy()
}
}
},
{
timeout: 60_000
}
)
describe('stringifyForModel', () => {
test('handles basic objects', () => {
const input = {
foo: 'bar',
nala: ['is', 'cute'],
kittens: null,
cats: undefined,
paws: 4.3
}
const result = stringifyForModel(input)
expect(result).toEqual(JSON.stringify(input, null))
})
test('handles empty input', () => {
const result = stringifyForModel()
expect(result).toEqual('')
})
})

160
src/utils.ts 100644
Wyświetl plik

@ -0,0 +1,160 @@
import type { Jsonifiable } from 'type-fest'
import dedent from 'dedent'
import hashObjectImpl, { type Options as HashObjectOptions } from 'hash-object'
import type * as types from './types.js'
export { assert } from './assert.js'
export { default as delay } from 'delay'
/**
* From `inputObj`, create a new object that does not include `keys`.
*
* @example
* ```js
* omit({ a: 1, b: 2, c: 3 }, 'a', 'c') // { b: 2 }
* ```
*/
export const omit = <
T extends Record<any, unknown> | object,
K extends keyof T = keyof T
>(
inputObj: T,
...keys: K[]
): Omit<T, K> => {
const keysSet = new Set(keys)
return Object.fromEntries(
Object.entries(inputObj).filter(([k]) => !keysSet.has(k as any))
) as any
}
/**
* From `inputObj`, create a new object that only includes `keys`.
*
* @example
* ```js
* pick({ a: 1, b: 2, c: 3 }, 'a', 'c') // { a: 1, c: 3 }
* ```
*/
export const pick = <
T extends Record<any, unknown> | object,
K extends keyof T = keyof T
>(
inputObj: T,
...keys: K[]
): Pick<T, K> => {
const keysSet = new Set(keys)
return Object.fromEntries(
Object.entries(inputObj).filter(([k]) => keysSet.has(k as any))
) as any
}
export function pruneUndefined<T extends Record<string, any>>(
obj: T
): NonNullable<{ [K in keyof T]: Exclude<T[K], undefined> }> {
return Object.fromEntries(
Object.entries(obj).filter(([, value]) => value !== undefined)
) as NonNullable<T>
}
export function pruneNullOrUndefined<T extends Record<string, any>>(
obj: T
): NonNullable<{ [K in keyof T]: Exclude<T[K], undefined | null> }> {
return Object.fromEntries(
Object.entries(obj).filter(
([, value]) => value !== undefined && value !== null
)
) as NonNullable<T>
}
export function getEnv(name: string): string | undefined {
try {
return typeof process !== 'undefined'
? // eslint-disable-next-line no-process-env
process.env?.[name]
: undefined
} catch {
return undefined
}
}
/**
* Function that does nothing.
*/
export const noop = () => undefined
/**
* Throttles HTTP requests made by a ky instance.
*
* Very useful for enforcing rate limits.
*/
export function throttleKy(
ky: types.KyInstance,
throttleFn: <Arguments extends readonly unknown[], ReturnValue>(
function_: (...args_: Arguments) => ReturnValue
) => types.ThrottledFunction<(...args_: Arguments) => ReturnValue>
) {
return ky.extend({
hooks: {
beforeRequest: [throttleFn(noop)]
}
})
}
/**
* Creates a new `URLSearchParams` object with all values coerced to strings
* that correctly handles arrays of values as repeated keys.
*/
export function sanitizeSearchParams(
searchParams: Record<
string,
string | number | boolean | string[] | number[] | boolean[] | undefined
>
): URLSearchParams {
return new URLSearchParams(
Object.entries(searchParams).flatMap(([key, value]) => {
if (key === undefined || value === undefined) {
return []
}
if (Array.isArray(value)) {
return value.map((v) => [key, String(v)])
}
return [[key, String(value)]]
}) as [string, string][]
)
}
/**
* Stringifies a JSON value in a way that's optimized for use with LLM prompts.
*/
export function stringifyForModel(jsonObject?: Jsonifiable): string {
if (jsonObject === undefined) {
return ''
}
if (typeof jsonObject === 'string') {
return jsonObject
}
return JSON.stringify(jsonObject, null, 0)
}
const dedenter = dedent.withOptions({ escapeSpecialCharacters: true })
/**
* Clean a string by removing extra newlines and indentation.
*
* @see: https://github.com/dmnd/dedent
*/
export function cleanStringForModel(text: string): string {
return dedenter(text).trim()
}
export function hashObject(
object: Record<string, any>,
options?: HashObjectOptions
): string {
return hashObjectImpl(object, { algorithm: 'sha256', ...options })
}

Wyświetl plik

@ -0,0 +1,63 @@
import { describe, expect, it } from 'vitest'
import { z } from 'zod'
import { zodToJsonSchema } from './zod-to-json-schema.js'
describe('zodToJsonSchema', () => {
it('handles basic objects', () => {
const params = zodToJsonSchema(
z.object({
name: z.string().min(1).describe('Name of the person'),
age: z.number().int().optional().describe('Age in years')
})
)
expect(params).toEqual({
additionalProperties: false,
type: 'object',
required: ['name'],
properties: {
name: {
type: 'string',
description: 'Name of the person',
minLength: 1
},
age: {
type: 'integer',
description: 'Age in years'
}
}
})
})
it('handles enums and unions', () => {
const params = zodToJsonSchema(
z.object({
name: z.string().min(1).describe('Name of the person'),
sexEnum: z.enum(['male', 'female']),
sexUnion: z.union([z.literal('male'), z.literal('female')])
})
)
expect(params).toEqual({
additionalProperties: false,
type: 'object',
required: ['name', 'sexEnum', 'sexUnion'],
properties: {
name: {
type: 'string',
description: 'Name of the person',
minLength: 1
},
sexEnum: {
type: 'string',
enum: ['male', 'female']
},
sexUnion: {
type: 'string',
enum: ['male', 'female']
}
}
})
})
})

Wyświetl plik

@ -0,0 +1,16 @@
import type { z } from 'zod'
import { zodToJsonSchema as zodToJsonSchemaImpl } from 'zod-to-json-schema'
import { omit } from './utils.js'
/** Generate a JSON Schema from a Zod schema. */
export function zodToJsonSchema(schema: z.ZodType): Record<string, unknown> {
return omit(
zodToJsonSchemaImpl(schema, { $refStrategy: 'none' }),
'$schema',
'default',
'definitions',
'description',
'markdownDescription'
)
}

32
tsconfig.json 100644
Wyświetl plik

@ -0,0 +1,32 @@
{
"compilerOptions": {
"target": "ES2020",
"lib": ["ESNext"],
"esModuleInterop": true,
"skipLibCheck": true,
"verbatimModuleSyntax": true,
"allowJs": true,
"resolveJsonModule": true,
"moduleDetection": "force",
"isolatedModules": true,
"useDefineForClassFields": true,
"jsx": "preserve",
// NOTE: these are deprecated
// "experimentalDecorators": true,
// "emitDecoratorMetadata": true,
"strict": true,
"noUncheckedIndexedAccess": true,
"forceConsistentCasingInFileNames": true,
"module": "NodeNext",
"moduleResolution": "NodeNext",
"outDir": "dist",
"sourceMap": true
},
"include": ["src"],
"ts-node": {
"transpileOnly": true
}
}

26
tsup.config.ts 100644
Wyświetl plik

@ -0,0 +1,26 @@
import { defineConfig } from 'tsup'
export default defineConfig([
{
entry: [
'src/index.ts',
'src/sdks/ai-sdk.ts',
'src/sdks/dexter.ts',
'src/sdks/genkit.ts',
'src/sdks/langchain.ts',
'src/sdks/llamaindex.ts',
'src/services/twitter/index.ts',
'src/tools/calculator.ts',
'src/tools/e2b.ts'
],
outDir: 'dist',
target: 'node18',
platform: 'node',
format: ['esm'],
splitting: false,
sourcemap: true,
minify: false,
shims: true,
dts: true
}
])

7
vite.config.ts 100644
Wyświetl plik

@ -0,0 +1,7 @@
import { defineConfig } from 'vitest/config'
export default defineConfig({
esbuild: {
target: 'es2022'
}
})