kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
feat: record tool call usage for mcp edge requests; add graceful error handling to mcp edge tool call requests
rodzic
f92e448eb2
commit
afc8e49044
|
@ -37,6 +37,7 @@
|
||||||
"@agentic/platform-validators": "workspace:*",
|
"@agentic/platform-validators": "workspace:*",
|
||||||
"@hono/zod-validator": "catalog:",
|
"@hono/zod-validator": "catalog:",
|
||||||
"@modelcontextprotocol/sdk": "catalog:",
|
"@modelcontextprotocol/sdk": "catalog:",
|
||||||
|
"@sentry/cloudflare": "catalog:",
|
||||||
"agents": "^0.0.95",
|
"agents": "^0.0.95",
|
||||||
"fast-content-type-parse": "catalog:",
|
"fast-content-type-parse": "catalog:",
|
||||||
"hono": "catalog:",
|
"hono": "catalog:",
|
||||||
|
|
|
@ -4,7 +4,6 @@ import {
|
||||||
cors,
|
cors,
|
||||||
errorHandler,
|
errorHandler,
|
||||||
init,
|
init,
|
||||||
responseTime,
|
|
||||||
sentry
|
sentry
|
||||||
} from '@agentic/platform-hono'
|
} from '@agentic/platform-hono'
|
||||||
import { parseToolIdentifier } from '@agentic/platform-validators'
|
import { parseToolIdentifier } from '@agentic/platform-validators'
|
||||||
|
@ -13,7 +12,7 @@ import { Hono } from 'hono'
|
||||||
import type { GatewayHonoEnv } from './lib/types'
|
import type { GatewayHonoEnv } from './lib/types'
|
||||||
import { createAgenticClient } from './lib/agentic-client'
|
import { createAgenticClient } from './lib/agentic-client'
|
||||||
import { createHttpResponseFromMcpToolCallResponse } from './lib/create-http-response-from-mcp-tool-call-response'
|
import { createHttpResponseFromMcpToolCallResponse } from './lib/create-http-response-from-mcp-tool-call-response'
|
||||||
import { reportToolCallUsage } from './lib/report-tool-call-usage'
|
import { recordToolCallUsage } from './lib/record-tool-call-usage'
|
||||||
import { resolveHttpEdgeRequest } from './lib/resolve-http-edge-request'
|
import { resolveHttpEdgeRequest } from './lib/resolve-http-edge-request'
|
||||||
import { resolveMcpEdgeRequest } from './lib/resolve-mcp-edge-request'
|
import { resolveMcpEdgeRequest } from './lib/resolve-mcp-edge-request'
|
||||||
import { resolveOriginToolCall } from './lib/resolve-origin-tool-call'
|
import { resolveOriginToolCall } from './lib/resolve-origin-tool-call'
|
||||||
|
@ -46,8 +45,6 @@ app.use(init)
|
||||||
// Wrangler does this for us. TODO: Does this happen on prod?
|
// Wrangler does this for us. TODO: Does this happen on prod?
|
||||||
// app.use(accessLogger)
|
// app.use(accessLogger)
|
||||||
|
|
||||||
app.use(responseTime)
|
|
||||||
|
|
||||||
app.all(async (ctx) => {
|
app.all(async (ctx) => {
|
||||||
const gatewayStartTimeMs = Date.now()
|
const gatewayStartTimeMs = Date.now()
|
||||||
ctx.set('cache', caches.default)
|
ctx.set('cache', caches.default)
|
||||||
|
@ -80,8 +77,6 @@ app.all(async (ctx) => {
|
||||||
|
|
||||||
const resolvedHttpEdgeRequest = await resolveHttpEdgeRequest(ctx)
|
const resolvedHttpEdgeRequest = await resolveHttpEdgeRequest(ctx)
|
||||||
|
|
||||||
const originStartTimeMs = Date.now()
|
|
||||||
|
|
||||||
const resolvedOriginToolCallResult = await resolveOriginToolCall({
|
const resolvedOriginToolCallResult = await resolveOriginToolCall({
|
||||||
tool: resolvedHttpEdgeRequest.tool,
|
tool: resolvedHttpEdgeRequest.tool,
|
||||||
args: resolvedHttpEdgeRequest.toolCallArgs,
|
args: resolvedHttpEdgeRequest.toolCallArgs,
|
||||||
|
@ -117,22 +112,22 @@ app.all(async (ctx) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Record the time it took for the origin to respond.
|
// Record the time it took for the origin to respond.
|
||||||
const now = Date.now()
|
res.headers.set(
|
||||||
const originTimespanMs = now - originStartTimeMs
|
'x-origin-response-time',
|
||||||
res.headers.set('x-origin-response-time', `${originTimespanMs}ms`)
|
`${resolvedOriginToolCallResult.originTimespanMs}ms`
|
||||||
|
)
|
||||||
|
|
||||||
const gatewayTimespanMs = now - gatewayStartTimeMs
|
// Record the time it took for the gateway to respond.
|
||||||
|
const gatewayTimespanMs = Date.now() - gatewayStartTimeMs
|
||||||
res.headers.set('x-response-time', `${gatewayTimespanMs}ms`)
|
res.headers.set('x-response-time', `${gatewayTimespanMs}ms`)
|
||||||
|
|
||||||
reportToolCallUsage({
|
recordToolCallUsage({
|
||||||
...resolvedHttpEdgeRequest,
|
...resolvedHttpEdgeRequest,
|
||||||
requestMode: 'http',
|
requestMode: 'http',
|
||||||
resolvedOriginToolCallResult,
|
resolvedOriginToolCallResult,
|
||||||
sessionId: ctx.get('sessionId')!,
|
sessionId: ctx.get('sessionId')!,
|
||||||
requestId: ctx.get('requestId')!,
|
requestId: ctx.get('requestId')!,
|
||||||
ip: ctx.get('ip'),
|
ip: ctx.get('ip'),
|
||||||
originTimespanMs,
|
|
||||||
gatewayTimespanMs,
|
|
||||||
env: ctx.env,
|
env: ctx.env,
|
||||||
waitUntil: ctx.executionCtx.waitUntil.bind(ctx.executionCtx)
|
waitUntil: ctx.executionCtx.waitUntil.bind(ctx.executionCtx)
|
||||||
})
|
})
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
import { assert } from '@agentic/platform-core'
|
import { assert } from '@agentic/platform-core'
|
||||||
import { Client as McpClient } from '@modelcontextprotocol/sdk/client/index.js'
|
import { Client as McpClient } from '@modelcontextprotocol/sdk/client/index.js'
|
||||||
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
|
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
|
||||||
|
import * as Sentry from '@sentry/cloudflare'
|
||||||
import { DurableObject } from 'cloudflare:workers'
|
import { DurableObject } from 'cloudflare:workers'
|
||||||
|
|
||||||
|
import type { RawEnv } from './env'
|
||||||
import type { AgenticMcpRequestMetadata } from './types'
|
import type { AgenticMcpRequestMetadata } from './types'
|
||||||
|
|
||||||
export type DurableMcpClientInfo = {
|
export type DurableMcpClientInfo = {
|
||||||
|
@ -16,7 +18,7 @@ export type DurableMcpClientInfo = {
|
||||||
// customer<>DurableMcpClientInfo connection?
|
// customer<>DurableMcpClientInfo connection?
|
||||||
// Currently using `sessionId`
|
// Currently using `sessionId`
|
||||||
|
|
||||||
export class DurableMcpClient extends DurableObject {
|
export class DurableMcpClientBase extends DurableObject<RawEnv> {
|
||||||
protected client?: McpClient
|
protected client?: McpClient
|
||||||
protected clientConnectionP?: Promise<void>
|
protected clientConnectionP?: Promise<void>
|
||||||
|
|
||||||
|
@ -82,3 +84,12 @@ export class DurableMcpClient extends DurableObject {
|
||||||
return JSON.stringify(toolCallResponse)
|
return JSON.stringify(toolCallResponse)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const DurableMcpClient = Sentry.instrumentDurableObjectWithSentry(
|
||||||
|
(env: RawEnv) => ({
|
||||||
|
dsn: env.SENTRY_DSN,
|
||||||
|
environment: env.ENVIRONMENT,
|
||||||
|
integrations: [Sentry.extraErrorDataIntegration()]
|
||||||
|
}),
|
||||||
|
DurableMcpClientBase
|
||||||
|
)
|
||||||
|
|
|
@ -6,14 +6,21 @@ import {
|
||||||
CallToolRequestSchema,
|
CallToolRequestSchema,
|
||||||
ListToolsRequestSchema
|
ListToolsRequestSchema
|
||||||
} from '@modelcontextprotocol/sdk/types.js'
|
} from '@modelcontextprotocol/sdk/types.js'
|
||||||
|
import * as Sentry from '@sentry/cloudflare'
|
||||||
import { McpAgent } from 'agents/mcp'
|
import { McpAgent } from 'agents/mcp'
|
||||||
|
|
||||||
import type { RawEnv } from './env'
|
import type { RawEnv } from './env'
|
||||||
import type { AdminConsumer } from './types'
|
import type {
|
||||||
|
AdminConsumer,
|
||||||
|
McpToolCallResponse,
|
||||||
|
ResolvedOriginToolCallResult
|
||||||
|
} from './types'
|
||||||
|
import { handleMcpToolCallError } from './handle-mcp-tool-call-error'
|
||||||
|
import { recordToolCallUsage } from './record-tool-call-usage'
|
||||||
import { resolveOriginToolCall } from './resolve-origin-tool-call'
|
import { resolveOriginToolCall } from './resolve-origin-tool-call'
|
||||||
import { transformHttpResponseToMcpToolCallResponse } from './transform-http-response-to-mcp-tool-call-response'
|
import { transformHttpResponseToMcpToolCallResponse } from './transform-http-response-to-mcp-tool-call-response'
|
||||||
|
|
||||||
export class DurableMcpServer extends McpAgent<
|
export class DurableMcpServerBase extends McpAgent<
|
||||||
RawEnv,
|
RawEnv,
|
||||||
never, // TODO: do we need local state?
|
never, // TODO: do we need local state?
|
||||||
{
|
{
|
||||||
|
@ -26,6 +33,11 @@ export class DurableMcpServer extends McpAgent<
|
||||||
protected _serverP = Promise.withResolvers<Server>()
|
protected _serverP = Promise.withResolvers<Server>()
|
||||||
override server = this._serverP.promise
|
override server = this._serverP.promise
|
||||||
|
|
||||||
|
// NOTE: This empty constructor is required for the Sentry wrapper to work.
|
||||||
|
public constructor(state: DurableObjectState, env: RawEnv) {
|
||||||
|
super(state, env)
|
||||||
|
}
|
||||||
|
|
||||||
override async init() {
|
override async init() {
|
||||||
const { consumer, deployment, pricingPlan, ip } = this.props
|
const { consumer, deployment, pricingPlan, ip } = this.props
|
||||||
const { projectIdentifier } = parseDeploymentIdentifier(
|
const { projectIdentifier } = parseDeploymentIdentifier(
|
||||||
|
@ -73,22 +85,17 @@ export class DurableMcpServer extends McpAgent<
|
||||||
}))
|
}))
|
||||||
|
|
||||||
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
||||||
const { name, arguments: args } = request.params
|
const { name: toolName, arguments: args } = request.params
|
||||||
const tool = tools.find((tool) => tool.name === name)
|
const sessionId = this.ctx.id.toString()
|
||||||
|
const tool = tools.find((tool) => tool.name === toolName)
|
||||||
|
|
||||||
|
let resolvedOriginToolCallResult: ResolvedOriginToolCallResult | undefined
|
||||||
|
let toolCallResponse: McpToolCallResponse | undefined
|
||||||
|
|
||||||
try {
|
try {
|
||||||
assert(tool, 404, `Unknown tool "${name}"`)
|
assert(tool, 404, `Unknown tool "${toolName}"`)
|
||||||
|
|
||||||
// TODO: usage tracking / reporting
|
resolvedOriginToolCallResult = await resolveOriginToolCall({
|
||||||
|
|
||||||
const sessionId = this.ctx.id.toString()
|
|
||||||
const {
|
|
||||||
toolCallArgs,
|
|
||||||
originRequest,
|
|
||||||
originResponse,
|
|
||||||
toolCallResponse,
|
|
||||||
rateLimitResult
|
|
||||||
} = await resolveOriginToolCall({
|
|
||||||
tool,
|
tool,
|
||||||
args,
|
args,
|
||||||
deployment,
|
deployment,
|
||||||
|
@ -100,39 +107,72 @@ export class DurableMcpServer extends McpAgent<
|
||||||
waitUntil: this.ctx.waitUntil.bind(this.ctx)
|
waitUntil: this.ctx.waitUntil.bind(this.ctx)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
const {
|
||||||
|
originResponse,
|
||||||
|
toolCallResponse: resolvedToolCallResponse,
|
||||||
|
rateLimitResult
|
||||||
|
} = resolvedOriginToolCallResult
|
||||||
|
|
||||||
if (originResponse) {
|
if (originResponse) {
|
||||||
return transformHttpResponseToMcpToolCallResponse({
|
toolCallResponse = await transformHttpResponseToMcpToolCallResponse({
|
||||||
originRequest,
|
|
||||||
originResponse,
|
|
||||||
tool,
|
tool,
|
||||||
toolCallArgs,
|
...resolvedOriginToolCallResult
|
||||||
rateLimitResult
|
|
||||||
})
|
})
|
||||||
} else if (toolCallResponse) {
|
} else if (resolvedToolCallResponse) {
|
||||||
if (toolCallResponse._meta || rateLimitResult) {
|
if (resolvedToolCallResponse._meta || rateLimitResult) {
|
||||||
return {
|
toolCallResponse = {
|
||||||
...toolCallResponse,
|
...resolvedToolCallResponse,
|
||||||
_meta: {
|
_meta: {
|
||||||
...toolCallResponse._meta,
|
...resolvedToolCallResponse._meta,
|
||||||
...(rateLimitResult
|
...(rateLimitResult
|
||||||
? getRateLimitHeaders(rateLimitResult)
|
? getRateLimitHeaders(rateLimitResult)
|
||||||
: undefined)
|
: undefined)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return toolCallResponse
|
toolCallResponse = resolvedToolCallResponse
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(false, 500)
|
assert(false, 500)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(toolCallResponse, 500, 'Missing tool call response')
|
||||||
|
return toolCallResponse
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
// TODO: handle errors
|
// Gracefully handle tool call exceptions, whether they're thrown by the
|
||||||
// eslint-disable-next-line no-console
|
// origin or internally by the gateway.
|
||||||
console.error(err)
|
toolCallResponse = handleMcpToolCallError(err, {
|
||||||
throw err
|
deployment,
|
||||||
|
consumer,
|
||||||
|
toolName,
|
||||||
|
sessionId,
|
||||||
|
env: this.env
|
||||||
|
})
|
||||||
|
|
||||||
|
return toolCallResponse
|
||||||
} finally {
|
} finally {
|
||||||
// TODO: report usage
|
// Record tool call usage, whether the call was successful or not.
|
||||||
|
recordToolCallUsage({
|
||||||
|
...this.props,
|
||||||
|
requestMode: 'mcp',
|
||||||
|
tool,
|
||||||
|
resolvedOriginToolCallResult,
|
||||||
|
sessionId,
|
||||||
|
// TODO: requestId
|
||||||
|
ip,
|
||||||
|
env: this.env,
|
||||||
|
waitUntil: this.ctx.waitUntil.bind(this.ctx)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const DurableMcpServer = Sentry.instrumentDurableObjectWithSentry(
|
||||||
|
(env: RawEnv) => ({
|
||||||
|
dsn: env.SENTRY_DSN,
|
||||||
|
environment: env.ENVIRONMENT,
|
||||||
|
integrations: [Sentry.extraErrorDataIntegration()]
|
||||||
|
}),
|
||||||
|
DurableMcpServerBase
|
||||||
|
)
|
||||||
|
|
|
@ -10,9 +10,6 @@ import {
|
||||||
} from '@agentic/platform-hono'
|
} from '@agentic/platform-hono'
|
||||||
import { z } from 'zod'
|
import { z } from 'zod'
|
||||||
|
|
||||||
import type { DurableMcpClient } from './durable-mcp-client'
|
|
||||||
import type { DurableRateLimiter } from './rate-limits/durable-rate-limiter'
|
|
||||||
|
|
||||||
export const envSchema = baseEnvSchema
|
export const envSchema = baseEnvSchema
|
||||||
.extend({
|
.extend({
|
||||||
AGENTIC_API_BASE_URL: z.string().url(),
|
AGENTIC_API_BASE_URL: z.string().url(),
|
||||||
|
@ -20,15 +17,15 @@ export const envSchema = baseEnvSchema
|
||||||
|
|
||||||
STRIPE_SECRET_KEY: z.string().nonempty(),
|
STRIPE_SECRET_KEY: z.string().nonempty(),
|
||||||
|
|
||||||
DO_RATE_LIMITER: z.custom<DurableObjectNamespace<DurableRateLimiter>>(
|
DO_RATE_LIMITER: z.custom<DurableObjectNamespace>((ns) =>
|
||||||
(ns) => isDurableObjectNamespace(ns)
|
isDurableObjectNamespace(ns)
|
||||||
),
|
),
|
||||||
|
|
||||||
DO_MCP_SERVER: z.custom<DurableObjectNamespace>((ns) =>
|
DO_MCP_SERVER: z.custom<DurableObjectNamespace>((ns) =>
|
||||||
isDurableObjectNamespace(ns)
|
isDurableObjectNamespace(ns)
|
||||||
),
|
),
|
||||||
|
|
||||||
DO_MCP_CLIENT: z.custom<DurableObjectNamespace<DurableMcpClient>>((ns) =>
|
DO_MCP_CLIENT: z.custom<DurableObjectNamespace>((ns) =>
|
||||||
isDurableObjectNamespace(ns)
|
isDurableObjectNamespace(ns)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
import type { AdminDeployment } from '@agentic/platform-types'
|
||||||
|
import type { ContentfulStatusCode } from 'hono/utils/http-status'
|
||||||
|
import { HttpError, pruneEmpty } from '@agentic/platform-core'
|
||||||
|
import * as Sentry from '@sentry/cloudflare'
|
||||||
|
import { HTTPException } from 'hono/http-exception'
|
||||||
|
import { HTTPError } from 'ky'
|
||||||
|
|
||||||
|
import type { RawEnv } from './env'
|
||||||
|
import type { AdminConsumer, McpToolCallResponse } from './types'
|
||||||
|
|
||||||
|
export function handleMcpToolCallError(
|
||||||
|
err: any,
|
||||||
|
{
|
||||||
|
deployment,
|
||||||
|
consumer,
|
||||||
|
toolName,
|
||||||
|
sessionId,
|
||||||
|
requestId,
|
||||||
|
env
|
||||||
|
}: {
|
||||||
|
deployment: AdminDeployment
|
||||||
|
consumer?: AdminConsumer
|
||||||
|
toolName: string
|
||||||
|
sessionId: string
|
||||||
|
requestId?: string
|
||||||
|
env: RawEnv
|
||||||
|
}
|
||||||
|
): McpToolCallResponse {
|
||||||
|
let message = 'Internal Server Error'
|
||||||
|
let status: ContentfulStatusCode = 500
|
||||||
|
|
||||||
|
const res: McpToolCallResponse = {
|
||||||
|
_meta: pruneEmpty({
|
||||||
|
deploymentId: deployment.id,
|
||||||
|
consumerId: consumer?.id,
|
||||||
|
toolName,
|
||||||
|
sessionId,
|
||||||
|
requestId
|
||||||
|
}),
|
||||||
|
isError: true,
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: message
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
const isProd = env.ENVIRONMENT === 'production'
|
||||||
|
|
||||||
|
if (err instanceof HttpError) {
|
||||||
|
message = err.message
|
||||||
|
status = err.statusCode as ContentfulStatusCode
|
||||||
|
|
||||||
|
// This is where rate-limit headers will be set, since `RateLimitError`
|
||||||
|
// is a subclass of `HttpError`.
|
||||||
|
if (err.headers) {
|
||||||
|
for (const [key, value] of Object.entries(err.headers)) {
|
||||||
|
res._meta![key] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (err instanceof HTTPException) {
|
||||||
|
message = err.message
|
||||||
|
status = err.status
|
||||||
|
} else if (err instanceof HTTPError) {
|
||||||
|
message = err.message
|
||||||
|
status = err.response.status as ContentfulStatusCode
|
||||||
|
} else if (!isProd && err.message) {
|
||||||
|
message = err.message
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Number.isSafeInteger(status)) {
|
||||||
|
status = 500
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status === 500) {
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.error(`mcp tool call "${toolName}" error`, status, err)
|
||||||
|
|
||||||
|
if (isProd) {
|
||||||
|
Sentry.captureException(err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.warn(`mcp tool call "${toolName}" warning`, status, message, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
res._meta!.status = status
|
||||||
|
res.content = [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: message
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
|
@ -1,13 +1,15 @@
|
||||||
import type { SetOptional } from 'type-fest'
|
import type { SetOptional } from 'type-fest'
|
||||||
|
import * as Sentry from '@sentry/cloudflare'
|
||||||
import { DurableObject } from 'cloudflare:workers'
|
import { DurableObject } from 'cloudflare:workers'
|
||||||
|
|
||||||
|
import type { RawEnv } from '../env'
|
||||||
import type { RateLimitState } from '../types'
|
import type { RateLimitState } from '../types'
|
||||||
|
|
||||||
const initialState: SetOptional<RateLimitState, 'resetTimeMs'> = {
|
const initialState: SetOptional<RateLimitState, 'resetTimeMs'> = {
|
||||||
current: 0
|
current: 0
|
||||||
}
|
}
|
||||||
|
|
||||||
export class DurableRateLimiter extends DurableObject {
|
export class DurableRateLimiterBase extends DurableObject<RawEnv> {
|
||||||
async update({
|
async update({
|
||||||
intervalMs,
|
intervalMs,
|
||||||
cost = 1
|
cost = 1
|
||||||
|
@ -44,3 +46,12 @@ export class DurableRateLimiter extends DurableObject {
|
||||||
await this.reset()
|
await this.reset()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const DurableRateLimiter = Sentry.instrumentDurableObjectWithSentry(
|
||||||
|
(env: RawEnv) => ({
|
||||||
|
dsn: env.SENTRY_DSN,
|
||||||
|
environment: env.ENVIRONMENT,
|
||||||
|
integrations: [Sentry.extraErrorDataIntegration()]
|
||||||
|
}),
|
||||||
|
DurableRateLimiterBase
|
||||||
|
)
|
||||||
|
|
|
@ -7,6 +7,7 @@ import type {
|
||||||
RateLimitState,
|
RateLimitState,
|
||||||
WaitUntil
|
WaitUntil
|
||||||
} from '../types'
|
} from '../types'
|
||||||
|
import type { DurableRateLimiterBase } from './durable-rate-limiter'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This maps persists across worker executions and is used for caching active
|
* This maps persists across worker executions and is used for caching active
|
||||||
|
@ -98,10 +99,12 @@ export async function enforceRateLimit({
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const did = env.DO_RATE_LIMITER.idFromName(id)
|
const durableRateLimiterId = env.DO_RATE_LIMITER.idFromName(id)
|
||||||
const obj = env.DO_RATE_LIMITER.get(did)
|
const durableRateLimiter = env.DO_RATE_LIMITER.get(
|
||||||
|
durableRateLimiterId
|
||||||
|
) as DurableObjectStub<DurableRateLimiterBase>
|
||||||
|
|
||||||
const updatedRateLimitStateP = obj.update({ cost, intervalMs })
|
const updatedRateLimitStateP = durableRateLimiter.update({ cost, intervalMs })
|
||||||
|
|
||||||
if (async) {
|
if (async) {
|
||||||
waitUntil(
|
waitUntil(
|
||||||
|
|
|
@ -29,48 +29,52 @@ import { createStripe } from './external/stripe'
|
||||||
*
|
*
|
||||||
* @see https://developers.cloudflare.com/analytics/analytics-engine/limits/
|
* @see https://developers.cloudflare.com/analytics/analytics-engine/limits/
|
||||||
*/
|
*/
|
||||||
export function reportToolCallUsage({
|
export function recordToolCallUsage({
|
||||||
requestMode,
|
requestMode,
|
||||||
tool,
|
|
||||||
deployment,
|
deployment,
|
||||||
consumer,
|
consumer,
|
||||||
|
tool,
|
||||||
resolvedOriginToolCallResult,
|
resolvedOriginToolCallResult,
|
||||||
ip,
|
ip,
|
||||||
sessionId,
|
sessionId,
|
||||||
originTimespanMs,
|
|
||||||
gatewayTimespanMs,
|
|
||||||
requestId,
|
requestId,
|
||||||
env,
|
env,
|
||||||
waitUntil
|
waitUntil
|
||||||
}: {
|
}: {
|
||||||
requestMode: RequestMode
|
requestMode: RequestMode
|
||||||
tool: Tool
|
|
||||||
deployment: AdminDeployment
|
deployment: AdminDeployment
|
||||||
consumer?: AdminConsumer
|
consumer?: AdminConsumer
|
||||||
pricingPlan?: PricingPlan
|
pricingPlan?: PricingPlan
|
||||||
resolvedOriginToolCallResult: ResolvedOriginToolCallResult
|
tool?: Tool
|
||||||
|
resolvedOriginToolCallResult?: ResolvedOriginToolCallResult
|
||||||
ip?: string
|
ip?: string
|
||||||
sessionId: string
|
sessionId: string
|
||||||
requestId: string
|
requestId?: string
|
||||||
originTimespanMs: number
|
|
||||||
gatewayTimespanMs: number
|
|
||||||
env: RawEnv
|
env: RawEnv
|
||||||
waitUntil: WaitUntil
|
waitUntil: WaitUntil
|
||||||
}): void {
|
}): void {
|
||||||
|
const { projectId } = deployment
|
||||||
const {
|
const {
|
||||||
rateLimitResult,
|
rateLimitResult,
|
||||||
cacheStatus,
|
cacheStatus,
|
||||||
originResponse,
|
originResponse,
|
||||||
|
originTimespanMs,
|
||||||
toolCallResponse,
|
toolCallResponse,
|
||||||
toolCallArgs,
|
toolCallArgs,
|
||||||
|
numRequestsCost,
|
||||||
reportUsage
|
reportUsage
|
||||||
} = resolvedOriginToolCallResult
|
} = resolvedOriginToolCallResult ?? {
|
||||||
const { projectId } = deployment
|
numRequestsCost: 0,
|
||||||
|
reportUsage: false
|
||||||
|
}
|
||||||
|
|
||||||
const requestSize = JSON.stringify(toolCallArgs).length
|
const requestSize = resolvedOriginToolCallResult
|
||||||
const responseSize =
|
? JSON.stringify(toolCallArgs).length
|
||||||
Number.parseInt(originResponse?.headers.get('content-length') ?? '0') ||
|
: 0
|
||||||
JSON.stringify(toolCallResponse).length
|
const responseSize = resolvedOriginToolCallResult
|
||||||
|
? Number.parseInt(originResponse?.headers.get('content-length') ?? '0') ||
|
||||||
|
JSON.stringify(toolCallResponse).length
|
||||||
|
: 0
|
||||||
|
|
||||||
// The string dimensions used for grouping and filtering (sometimes called
|
// The string dimensions used for grouping and filtering (sometimes called
|
||||||
// labels in other metrics systems).
|
// labels in other metrics systems).
|
||||||
|
@ -84,7 +88,7 @@ export function reportToolCallUsage({
|
||||||
deployment.id,
|
deployment.id,
|
||||||
|
|
||||||
// Name of the tool that was called
|
// Name of the tool that was called
|
||||||
tool.name,
|
tool?.name ?? null,
|
||||||
|
|
||||||
// Whether this request was made via MCP or HTTP
|
// Whether this request was made via MCP or HTTP
|
||||||
requestMode,
|
requestMode,
|
||||||
|
@ -102,24 +106,27 @@ export function reportToolCallUsage({
|
||||||
consumer?.stripeStatus ?? null,
|
consumer?.stripeStatus ?? null,
|
||||||
|
|
||||||
// Whether the request was rate-limited
|
// Whether the request was rate-limited
|
||||||
rateLimitResult?.passed ? 'rl-passed' : 'rl-exceeded',
|
resolvedOriginToolCallResult
|
||||||
|
? rateLimitResult?.passed
|
||||||
|
? 'rl-passed'
|
||||||
|
: 'rl-exceeded'
|
||||||
|
: null,
|
||||||
|
|
||||||
// Whether the request hit the cache
|
// Whether the request hit the cache
|
||||||
cacheStatus,
|
cacheStatus ?? null,
|
||||||
|
|
||||||
// Response status
|
// Response status
|
||||||
originResponse?.status?.toString() ||
|
resolvedOriginToolCallResult
|
||||||
(toolCallResponse ? (toolCallResponse.isError ? 'error' : '200') : null)
|
? originResponse?.status?.toString() ||
|
||||||
|
(toolCallResponse ? (toolCallResponse.isError ? 'error' : '200') : null)
|
||||||
|
: 'error'
|
||||||
]
|
]
|
||||||
|
|
||||||
// Numberic values to record in this data point.
|
// Numberic values to record in this data point.
|
||||||
// NOTE: It is important that the ordering of these fields remains consistent!
|
// NOTE: It is important that the ordering of these fields remains consistent!
|
||||||
const doubles = [
|
const doubles = [
|
||||||
// Origin timespan in milliseconds
|
// Origin timespan in milliseconds
|
||||||
originTimespanMs,
|
originTimespanMs ?? 0,
|
||||||
|
|
||||||
// Gateway timespan in milliseconds
|
|
||||||
gatewayTimespanMs,
|
|
||||||
|
|
||||||
// Request bandwidth in bytes
|
// Request bandwidth in bytes
|
||||||
requestSize,
|
requestSize,
|
||||||
|
@ -129,7 +136,10 @@ export function reportToolCallUsage({
|
||||||
|
|
||||||
// Total bandwidth in bytes
|
// Total bandwidth in bytes
|
||||||
// TODO: Correctly calculate total bandwidth using `content-length`
|
// TODO: Correctly calculate total bandwidth using `content-length`
|
||||||
requestSize + responseSize
|
requestSize + responseSize,
|
||||||
|
|
||||||
|
// Number of requests cost
|
||||||
|
numRequestsCost ?? 0
|
||||||
]
|
]
|
||||||
|
|
||||||
// Cloudflare Analytics Engine only supports writing a single index at a time,
|
// Cloudflare Analytics Engine only supports writing a single index at a time,
|
||||||
|
@ -162,7 +172,9 @@ export function reportToolCallUsage({
|
||||||
|
|
||||||
const pricingPlanLineItemSlug = 'requests'
|
const pricingPlanLineItemSlug = 'requests'
|
||||||
const eventName = `meter-${projectId}-${pricingPlanLineItemSlug}`
|
const eventName = `meter-${projectId}-${pricingPlanLineItemSlug}`
|
||||||
const identifier = `${requestId}:${consumer.id}:${tool.name}`
|
const identifier = requestId
|
||||||
|
? `${requestId}:${consumer.id}:${tool?.name || 'unknown-tool'}`
|
||||||
|
: undefined
|
||||||
|
|
||||||
// Report usage to Stripe asynchronously.
|
// Report usage to Stripe asynchronously.
|
||||||
waitUntil(
|
waitUntil(
|
||||||
|
@ -170,7 +182,7 @@ export function reportToolCallUsage({
|
||||||
event_name: eventName,
|
event_name: eventName,
|
||||||
identifier,
|
identifier,
|
||||||
payload: {
|
payload: {
|
||||||
value: '1',
|
value: numRequestsCost.toString(),
|
||||||
stripe_customer_id: consumer._stripeCustomerId
|
stripe_customer_id: consumer._stripeCustomerId
|
||||||
}
|
}
|
||||||
})
|
})
|
|
@ -4,9 +4,11 @@ import type {
|
||||||
RateLimit,
|
RateLimit,
|
||||||
Tool
|
Tool
|
||||||
} from '@agentic/platform-types'
|
} from '@agentic/platform-types'
|
||||||
|
import type { DurableObjectStub } from '@cloudflare/workers-types'
|
||||||
import { assert, RateLimitError } from '@agentic/platform-core'
|
import { assert, RateLimitError } from '@agentic/platform-core'
|
||||||
import { parseDeploymentIdentifier } from '@agentic/platform-validators'
|
import { parseDeploymentIdentifier } from '@agentic/platform-validators'
|
||||||
|
|
||||||
|
import type { DurableMcpClientBase } from './durable-mcp-client'
|
||||||
import type { RawEnv } from './env'
|
import type { RawEnv } from './env'
|
||||||
import type {
|
import type {
|
||||||
AdminConsumer,
|
AdminConsumer,
|
||||||
|
@ -54,6 +56,8 @@ export async function resolveOriginToolCall({
|
||||||
// be rate-limited / cached / tracked / etc.
|
// be rate-limited / cached / tracked / etc.
|
||||||
|
|
||||||
const { originAdapter } = deployment
|
const { originAdapter } = deployment
|
||||||
|
// TODO: make this configurable via `ToolConfig.cost`
|
||||||
|
const numRequestsCost = 1
|
||||||
let rateLimitResult: RateLimitResult | undefined
|
let rateLimitResult: RateLimitResult | undefined
|
||||||
let rateLimit: RateLimit | undefined | null
|
let rateLimit: RateLimit | undefined | null
|
||||||
let reportUsage = true
|
let reportUsage = true
|
||||||
|
@ -143,11 +147,14 @@ export async function resolveOriginToolCall({
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rateLimit) {
|
if (rateLimit) {
|
||||||
|
// TODO: Consider decrementing rate limit if the response is cached or
|
||||||
|
// errors? this doesn't seem too important, so will leave as-is for now.
|
||||||
rateLimitResult = await enforceRateLimit({
|
rateLimitResult = await enforceRateLimit({
|
||||||
id: consumer?.id ?? ip ?? sessionId,
|
id: consumer?.id ?? ip ?? sessionId,
|
||||||
interval: rateLimit.interval,
|
interval: rateLimit.interval,
|
||||||
maxPerInterval: rateLimit.maxPerInterval,
|
maxPerInterval: rateLimit.maxPerInterval,
|
||||||
async: rateLimit.async,
|
async: rateLimit.async,
|
||||||
|
cost: numRequestsCost,
|
||||||
env,
|
env,
|
||||||
waitUntil
|
waitUntil
|
||||||
})
|
})
|
||||||
|
@ -169,6 +176,8 @@ export async function resolveOriginToolCall({
|
||||||
strictAdditionalProperties: true
|
strictAdditionalProperties: true
|
||||||
})
|
})
|
||||||
|
|
||||||
|
const originStartTimeMs = Date.now()
|
||||||
|
|
||||||
if (originAdapter.type === 'openapi') {
|
if (originAdapter.type === 'openapi') {
|
||||||
const operation = originAdapter.toolToOperationMap[tool.name]
|
const operation = originAdapter.toolToOperationMap[tool.name]
|
||||||
assert(operation, 404, `Tool "${tool.name}" not found in OpenAPI spec`)
|
assert(operation, 404, `Tool "${tool.name}" not found in OpenAPI spec`)
|
||||||
|
@ -201,7 +210,9 @@ export async function resolveOriginToolCall({
|
||||||
rateLimitResult,
|
rateLimitResult,
|
||||||
toolCallArgs,
|
toolCallArgs,
|
||||||
originRequest,
|
originRequest,
|
||||||
originResponse
|
originResponse,
|
||||||
|
originTimespanMs: Date.now() - originStartTimeMs,
|
||||||
|
numRequestsCost
|
||||||
}
|
}
|
||||||
} else if (originAdapter.type === 'mcp') {
|
} else if (originAdapter.type === 'mcp') {
|
||||||
const { projectIdentifier } = parseDeploymentIdentifier(
|
const { projectIdentifier } = parseDeploymentIdentifier(
|
||||||
|
@ -210,7 +221,9 @@ export async function resolveOriginToolCall({
|
||||||
)
|
)
|
||||||
|
|
||||||
const id = env.DO_MCP_CLIENT.idFromName(sessionId)
|
const id = env.DO_MCP_CLIENT.idFromName(sessionId)
|
||||||
const originMcpClient = env.DO_MCP_CLIENT.get(id)
|
const originMcpClient = env.DO_MCP_CLIENT.get(
|
||||||
|
id
|
||||||
|
) as DurableObjectStub<DurableMcpClientBase>
|
||||||
|
|
||||||
await originMcpClient.init({
|
await originMcpClient.init({
|
||||||
url: deployment.originUrl,
|
url: deployment.originUrl,
|
||||||
|
@ -263,7 +276,9 @@ export async function resolveOriginToolCall({
|
||||||
reportUsage,
|
reportUsage,
|
||||||
rateLimitResult,
|
rateLimitResult,
|
||||||
toolCallArgs,
|
toolCallArgs,
|
||||||
toolCallResponse: (await response.json()) as McpToolCallResponse
|
toolCallResponse: (await response.json()) as McpToolCallResponse,
|
||||||
|
originTimespanMs: Date.now() - originStartTimeMs,
|
||||||
|
numRequestsCost
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -294,10 +309,16 @@ export async function resolveOriginToolCall({
|
||||||
reportUsage,
|
reportUsage,
|
||||||
rateLimitResult,
|
rateLimitResult,
|
||||||
toolCallArgs,
|
toolCallArgs,
|
||||||
toolCallResponse
|
toolCallResponse,
|
||||||
|
originTimespanMs: Date.now() - originStartTimeMs,
|
||||||
|
numRequestsCost
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(false, 500)
|
assert(
|
||||||
|
false,
|
||||||
|
500,
|
||||||
|
`Internal error: origin adapter type "${(originAdapter as any).type}"`
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,6 +67,8 @@ export type ResolvedOriginToolCallResult = {
|
||||||
rateLimitResult?: RateLimitResult
|
rateLimitResult?: RateLimitResult
|
||||||
cacheStatus: CacheStatus
|
cacheStatus: CacheStatus
|
||||||
reportUsage: boolean
|
reportUsage: boolean
|
||||||
|
originTimespanMs: number
|
||||||
|
numRequestsCost: number
|
||||||
} & (
|
} & (
|
||||||
| {
|
| {
|
||||||
originRequest: Request
|
originRequest: Request
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
// import { parseToolIdentifier } from '@agentic/platform-validators'
|
import * as Sentry from '@sentry/cloudflare'
|
||||||
|
|
||||||
import { app } from './app'
|
import { app } from './app'
|
||||||
// import { DurableMcpServer } from './lib/durable-mcp-server'
|
import { type Env, parseEnv, type RawEnv } from './lib/env'
|
||||||
import { type Env, parseEnv } from './lib/env'
|
|
||||||
|
|
||||||
// Export Durable Objects for cloudflare
|
// Export Durable Objects for cloudflare
|
||||||
export { DurableMcpClient } from './lib/durable-mcp-client'
|
export { DurableMcpClient } from './lib/durable-mcp-client'
|
||||||
|
@ -10,33 +9,40 @@ export { DurableMcpServer } from './lib/durable-mcp-server'
|
||||||
export { DurableRateLimiter } from './lib/rate-limits/durable-rate-limiter'
|
export { DurableRateLimiter } from './lib/rate-limits/durable-rate-limiter'
|
||||||
|
|
||||||
// Main worker entrypoint
|
// Main worker entrypoint
|
||||||
export default {
|
export default Sentry.withSentry(
|
||||||
async fetch(
|
(env: RawEnv) => ({
|
||||||
request: Request,
|
dsn: env.SENTRY_DSN,
|
||||||
env: Env,
|
environment: env.ENVIRONMENT,
|
||||||
ctx: ExecutionContext
|
integrations: [Sentry.extraErrorDataIntegration()]
|
||||||
): Promise<Response> {
|
}),
|
||||||
let parsedEnv: Env
|
{
|
||||||
|
async fetch(
|
||||||
|
request: Request,
|
||||||
|
env: Env,
|
||||||
|
ctx: ExecutionContext
|
||||||
|
): Promise<Response> {
|
||||||
|
let parsedEnv: Env
|
||||||
|
|
||||||
// Validate the environment
|
// Validate the environment
|
||||||
try {
|
try {
|
||||||
parsedEnv = parseEnv(env)
|
parsedEnv = parseEnv(env)
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
// eslint-disable-next-line no-console
|
// eslint-disable-next-line no-console
|
||||||
console.error('error api gateway invalid env:', err.message)
|
console.error('error api gateway invalid env:', err.message)
|
||||||
|
|
||||||
return new Response(
|
return new Response(
|
||||||
JSON.stringify({ error: 'Invalid api gateway environment' }),
|
JSON.stringify({ error: 'Invalid api gateway environment' }),
|
||||||
{
|
{
|
||||||
status: 500,
|
status: 500,
|
||||||
headers: {
|
headers: {
|
||||||
'content-type': 'application/json'
|
'content-type': 'application/json'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
)
|
||||||
)
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Handle the request with `hono`
|
// Handle the request with `hono`
|
||||||
return app.fetch(request, parsedEnv, ctx)
|
return app.fetch(request, parsedEnv, ctx)
|
||||||
}
|
}
|
||||||
} satisfies ExportedHandler<Env>
|
} satisfies ExportedHandler<Env>
|
||||||
|
)
|
||||||
|
|
|
@ -433,6 +433,9 @@ importers:
|
||||||
'@modelcontextprotocol/sdk':
|
'@modelcontextprotocol/sdk':
|
||||||
specifier: 'catalog:'
|
specifier: 'catalog:'
|
||||||
version: 1.12.1
|
version: 1.12.1
|
||||||
|
'@sentry/cloudflare':
|
||||||
|
specifier: 'catalog:'
|
||||||
|
version: 9.26.0(@cloudflare/workers-types@4.20250604.0)
|
||||||
agents:
|
agents:
|
||||||
specifier: ^0.0.95
|
specifier: ^0.0.95
|
||||||
version: 0.0.95(@cloudflare/workers-types@4.20250604.0)(react@19.1.0)
|
version: 0.0.95(@cloudflare/workers-types@4.20250604.0)(react@19.1.0)
|
||||||
|
|
|
@ -76,6 +76,7 @@
|
||||||
- signed requests
|
- signed requests
|
||||||
- add support for custom headers on responses
|
- add support for custom headers on responses
|
||||||
- add ability to only report stripe usage on non-cached requests
|
- add ability to only report stripe usage on non-cached requests
|
||||||
|
- add support for ToolConfig.cost defaulting to 1, to easily support tools which cost multiple "credits"
|
||||||
- `@agentic/platform-hono`
|
- `@agentic/platform-hono`
|
||||||
- fix sentry middleware
|
- fix sentry middleware
|
||||||
- https://github.com/honojs/middleware/blob/main/packages/sentry/src/index.ts
|
- https://github.com/honojs/middleware/blob/main/packages/sentry/src/index.ts
|
||||||
|
|
Ładowanie…
Reference in New Issue