feat: improve recording tool usage for http gateway requests

pull/715/head
Travis Fischer 2025-06-11 10:00:50 +07:00
rodzic 4614c2322e
commit add1a43cd7
6 zmienionych plików z 166 dodań i 100 usunięć

Wyświetl plik

@ -4,16 +4,20 @@ import {
cors, cors,
errorHandler, errorHandler,
init, init,
responseTime,
sentry sentry
} from '@agentic/platform-hono' } from '@agentic/platform-hono'
import { parseToolIdentifier } from '@agentic/platform-validators' import { parseToolIdentifier } from '@agentic/platform-validators'
import { Hono } from 'hono' import { Hono } from 'hono'
import type { GatewayHonoEnv } from './lib/types' import type { GatewayHonoEnv, ResolvedOriginToolCallResult } from './lib/types'
import { createAgenticClient } from './lib/agentic-client' import { createAgenticClient } from './lib/agentic-client'
import { createHttpResponseFromMcpToolCallResponse } from './lib/create-http-response-from-mcp-tool-call-response' import { createHttpResponseFromMcpToolCallResponse } from './lib/create-http-response-from-mcp-tool-call-response'
import { recordToolCallUsage } from './lib/record-tool-call-usage' import { recordToolCallUsage } from './lib/record-tool-call-usage'
import { resolveHttpEdgeRequest } from './lib/resolve-http-edge-request' import {
type ResolvedHttpEdgeRequest,
resolveHttpEdgeRequest
} from './lib/resolve-http-edge-request'
import { resolveMcpEdgeRequest } from './lib/resolve-mcp-edge-request' import { resolveMcpEdgeRequest } from './lib/resolve-mcp-edge-request'
import { resolveOriginToolCall } from './lib/resolve-origin-tool-call' import { resolveOriginToolCall } from './lib/resolve-origin-tool-call'
import { isRequestPubliclyCacheable } from './lib/utils' import { isRequestPubliclyCacheable } from './lib/utils'
@ -45,15 +49,17 @@ app.use(init)
// Wrangler does this for us. TODO: Does this happen on prod? // Wrangler does this for us. TODO: Does this happen on prod?
// app.use(accessLogger) // app.use(accessLogger)
app.use(responseTime)
app.all(async (ctx) => { app.all(async (ctx) => {
const gatewayStartTimeMs = Date.now() const waitUntil = ctx.executionCtx.waitUntil.bind(ctx.executionCtx)
ctx.set('cache', caches.default) ctx.set('cache', caches.default)
ctx.set( ctx.set(
'client', 'client',
createAgenticClient({ createAgenticClient({
env: ctx.env, env: ctx.env,
cache: caches.default, cache: caches.default,
waitUntil: ctx.executionCtx.waitUntil.bind(ctx.executionCtx), waitUntil,
isCachingEnabled: isRequestPubliclyCacheable(ctx.req.raw) isCachingEnabled: isRequestPubliclyCacheable(ctx.req.raw)
}) })
) )
@ -75,73 +81,83 @@ app.all(async (ctx) => {
}).fetch(ctx.req.raw, ctx.env, executionCtx) }).fetch(ctx.req.raw, ctx.env, executionCtx)
} }
const resolvedHttpEdgeRequest = await resolveHttpEdgeRequest(ctx) let resolvedHttpEdgeRequest: ResolvedHttpEdgeRequest | undefined
let resolvedOriginToolCallResult: ResolvedOriginToolCallResult | undefined
const resolvedOriginToolCallResult = await resolveOriginToolCall({
tool: resolvedHttpEdgeRequest.tool,
args: resolvedHttpEdgeRequest.toolCallArgs,
deployment: resolvedHttpEdgeRequest.deployment,
consumer: resolvedHttpEdgeRequest.consumer,
pricingPlan: resolvedHttpEdgeRequest.pricingPlan,
cacheControl: resolvedHttpEdgeRequest.cacheControl,
sessionId: ctx.get('sessionId')!,
ip: ctx.get('ip'),
env: ctx.env,
waitUntil: ctx.executionCtx.waitUntil.bind(ctx.executionCtx)
})
let originResponse: Response | undefined let originResponse: Response | undefined
if (resolvedOriginToolCallResult.originResponse) { let res: Response | undefined
originResponse = resolvedOriginToolCallResult.originResponse
} else { function updateResponse(response: Response) {
originResponse = await createHttpResponseFromMcpToolCallResponse(ctx, { const res = new Response(response.body, response)
tool: resolvedHttpEdgeRequest.tool,
deployment: resolvedHttpEdgeRequest.deployment, if (resolvedOriginToolCallResult) {
toolCallResponse: resolvedOriginToolCallResult.toolCallResponse if (resolvedOriginToolCallResult.rateLimitResult) {
}) applyRateLimitHeaders({
res,
rateLimitResult: resolvedOriginToolCallResult.rateLimitResult
})
}
// Record the time it took for the origin to respond.
res.headers.set(
'x-origin-response-time',
`${resolvedOriginToolCallResult.originTimespanMs}ms`
)
}
// Reset server to Agentic because Cloudflare likes to override things
res.headers.set('server', 'agentic')
// Remove extra Cloudflare headers
res.headers.delete('x-powered-by')
res.headers.delete('via')
res.headers.delete('nel')
res.headers.delete('report-to')
res.headers.delete('server-timing')
res.headers.delete('reporting-endpoints')
return res
} }
assert(originResponse, 500, 'Origin response is required') try {
const res = new Response(originResponse.body, originResponse) resolvedHttpEdgeRequest = await resolveHttpEdgeRequest(ctx)
if (resolvedOriginToolCallResult.rateLimitResult) { resolvedOriginToolCallResult = await resolveOriginToolCall({
applyRateLimitHeaders({ ...resolvedHttpEdgeRequest,
res, args: resolvedHttpEdgeRequest.toolCallArgs,
rateLimitResult: resolvedOriginToolCallResult.rateLimitResult sessionId: ctx.get('sessionId')!,
ip: ctx.get('ip'),
env: ctx.env,
waitUntil
}) })
if (resolvedOriginToolCallResult.originResponse) {
originResponse = resolvedOriginToolCallResult.originResponse
} else {
originResponse = await createHttpResponseFromMcpToolCallResponse(ctx, {
...resolvedHttpEdgeRequest,
toolCallResponse: resolvedOriginToolCallResult.toolCallResponse
})
}
assert(originResponse, 500, 'Origin response is required')
res = updateResponse(originResponse)
return res
} catch (err: any) {
res = updateResponse(errorHandler(err, ctx))
return res
} finally {
if (resolvedHttpEdgeRequest && res) {
recordToolCallUsage({
...resolvedHttpEdgeRequest,
requestMode: 'http',
httpResponse: res,
resolvedOriginToolCallResult,
sessionId: ctx.get('sessionId')!,
requestId: ctx.get('requestId')!,
ip: ctx.get('ip'),
env: ctx.env,
waitUntil
})
}
} }
// Record the time it took for the origin to respond.
res.headers.set(
'x-origin-response-time',
`${resolvedOriginToolCallResult.originTimespanMs}ms`
)
// Record the time it took for the gateway to respond.
const gatewayTimespanMs = Date.now() - gatewayStartTimeMs
res.headers.set('x-response-time', `${gatewayTimespanMs}ms`)
recordToolCallUsage({
...resolvedHttpEdgeRequest,
requestMode: 'http',
resolvedOriginToolCallResult,
sessionId: ctx.get('sessionId')!,
requestId: ctx.get('requestId')!,
ip: ctx.get('ip'),
env: ctx.env,
waitUntil: ctx.executionCtx.waitUntil.bind(ctx.executionCtx)
})
// Reset server to Agentic because Cloudflare likes to override things
res.headers.set('server', 'agentic')
// Remove extra Cloudflare headers
res.headers.delete('x-powered-by')
res.headers.delete('via')
res.headers.delete('nel')
res.headers.delete('report-to')
res.headers.delete('server-timing')
res.headers.delete('reporting-endpoints')
return res
}) })

Wyświetl plik

@ -1,5 +1,5 @@
import type { AdminDeployment, PricingPlan } from '@agentic/platform-types' import type { AdminDeployment, PricingPlan } from '@agentic/platform-types'
import { assert, getRateLimitHeaders } from '@agentic/platform-core' import { assert, getRateLimitHeaders, pruneEmpty } from '@agentic/platform-core'
import { parseDeploymentIdentifier } from '@agentic/platform-validators' import { parseDeploymentIdentifier } from '@agentic/platform-validators'
import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { Server } from '@modelcontextprotocol/sdk/server/index.js'
import { import {
@ -124,9 +124,11 @@ export class DurableMcpServerBase extends McpAgent<
...resolvedToolCallResponse, ...resolvedToolCallResponse,
_meta: { _meta: {
...resolvedToolCallResponse._meta, ...resolvedToolCallResponse._meta,
...(rateLimitResult ...pruneEmpty({
? getRateLimitHeaders(rateLimitResult) headers: rateLimitResult
: undefined) ? getRateLimitHeaders(rateLimitResult)
: undefined
})
} }
} }
} else { } else {
@ -156,6 +158,7 @@ export class DurableMcpServerBase extends McpAgent<
...this.props, ...this.props,
requestMode: 'mcp', requestMode: 'mcp',
tool, tool,
mcpToolCallResponse: toolCallResponse!,
resolvedOriginToolCallResult, resolvedOriginToolCallResult,
sessionId, sessionId,
// TODO: requestId // TODO: requestId

Wyświetl plik

@ -1,13 +1,27 @@
import type { AdminDeployment } from '@agentic/platform-types' import type { AdminDeployment } from '@agentic/platform-types'
import type { ContentfulStatusCode } from 'hono/utils/http-status' import type { ContentfulStatusCode } from 'hono/utils/http-status'
import { HttpError, pruneEmpty } from '@agentic/platform-core' import {
getRateLimitHeaders,
HttpError,
pruneEmpty
} from '@agentic/platform-core'
import * as Sentry from '@sentry/cloudflare' import * as Sentry from '@sentry/cloudflare'
import { HTTPException } from 'hono/http-exception' import { HTTPException } from 'hono/http-exception'
import { HTTPError } from 'ky' import { HTTPError } from 'ky'
import type { RawEnv } from './env' import type { RawEnv } from './env'
import type { AdminConsumer, McpToolCallResponse } from './types' import type {
AdminConsumer,
McpToolCallResponse,
RateLimitResult
} from './types'
/**
* Turns a thrown error into an MCP error tool call response, and attempts to
* capture as much context as possible for potential debugging.
*
* @note This function is synchronous and must never throw.
*/
export function handleMcpToolCallError( export function handleMcpToolCallError(
err: any, err: any,
{ {
@ -16,6 +30,7 @@ export function handleMcpToolCallError(
toolName, toolName,
sessionId, sessionId,
requestId, requestId,
rateLimitResult,
env env
}: { }: {
deployment: AdminDeployment deployment: AdminDeployment
@ -23,9 +38,11 @@ export function handleMcpToolCallError(
toolName: string toolName: string
sessionId: string sessionId: string
requestId?: string requestId?: string
rateLimitResult?: RateLimitResult
env: RawEnv env: RawEnv
} }
): McpToolCallResponse { ): McpToolCallResponse {
const isProd = env.ENVIRONMENT === 'production'
let message = 'Internal Server Error' let message = 'Internal Server Error'
let status: ContentfulStatusCode = 500 let status: ContentfulStatusCode = 500
@ -35,7 +52,10 @@ export function handleMcpToolCallError(
consumerId: consumer?.id, consumerId: consumer?.id,
toolName, toolName,
sessionId, sessionId,
requestId requestId,
headers: rateLimitResult
? getRateLimitHeaders(rateLimitResult)
: undefined
}), }),
isError: true, isError: true,
content: [ content: [
@ -46,8 +66,6 @@ export function handleMcpToolCallError(
] ]
} }
const isProd = env.ENVIRONMENT === 'production'
if (err instanceof HttpError) { if (err instanceof HttpError) {
message = err.message message = err.message
status = err.statusCode as ContentfulStatusCode status = err.statusCode as ContentfulStatusCode
@ -78,7 +96,12 @@ export function handleMcpToolCallError(
console.error(`mcp tool call "${toolName}" error`, status, err) console.error(`mcp tool call "${toolName}" error`, status, err)
if (isProd) { if (isProd) {
Sentry.captureException(err) try {
Sentry.captureException(err)
} catch (err_) {
// eslint-disable-next-line no-console
console.error('Error Sentry.captureException failed', err, err_)
}
} }
} else { } else {
// eslint-disable-next-line no-console // eslint-disable-next-line no-console

Wyświetl plik

@ -7,6 +7,7 @@ import type {
import type { RawEnv } from './env' import type { RawEnv } from './env'
import type { import type {
AdminConsumer, AdminConsumer,
McpToolCallResponse,
RequestMode, RequestMode,
ResolvedOriginToolCallResult, ResolvedOriginToolCallResult,
WaitUntil WaitUntil
@ -35,6 +36,8 @@ export function recordToolCallUsage({
consumer, consumer,
tool, tool,
resolvedOriginToolCallResult, resolvedOriginToolCallResult,
httpResponse,
mcpToolCallResponse,
ip, ip,
sessionId, sessionId,
requestId, requestId,
@ -47,19 +50,32 @@ export function recordToolCallUsage({
pricingPlan?: PricingPlan pricingPlan?: PricingPlan
tool?: Tool tool?: Tool
resolvedOriginToolCallResult?: ResolvedOriginToolCallResult resolvedOriginToolCallResult?: ResolvedOriginToolCallResult
httpResponse?: Response
mcpToolCallResponse?: McpToolCallResponse
ip?: string ip?: string
sessionId: string sessionId: string
requestId?: string requestId?: string
env: RawEnv env: RawEnv
waitUntil: WaitUntil waitUntil: WaitUntil
}): void { } & (
| {
// For http requests, an http response is required.
requestMode: 'http'
httpResponse: Response
mcpToolCallResponse?: never
}
| {
// For mcp cool call requests, an mcp tool call response is required.
requestMode: 'mcp'
httpResponse?: never
mcpToolCallResponse: McpToolCallResponse
}
)): void {
const { projectId } = deployment const { projectId } = deployment
const { const {
rateLimitResult, rateLimitResult,
cacheStatus, cacheStatus,
originResponse,
originTimespanMs, originTimespanMs,
toolCallResponse,
toolCallArgs, toolCallArgs,
numRequestsCost, numRequestsCost,
reportUsage reportUsage
@ -67,18 +83,16 @@ export function recordToolCallUsage({
numRequestsCost: 0, numRequestsCost: 0,
reportUsage: false reportUsage: false
} }
mcpToolCallResponse ??= resolvedOriginToolCallResult?.toolCallResponse
const requestSize = resolvedOriginToolCallResult const requestSize = toolCallArgs ? JSON.stringify(toolCallArgs).length : 0
? JSON.stringify(toolCallArgs).length const responseSize =
: 0 Number.parseInt(httpResponse?.headers.get('content-length') ?? '0') ||
const responseSize = resolvedOriginToolCallResult (mcpToolCallResponse ? JSON.stringify(mcpToolCallResponse).length : 0)
? Number.parseInt(originResponse?.headers.get('content-length') ?? '0') ||
JSON.stringify(toolCallResponse).length
: 0
// The string dimensions used for grouping and filtering (sometimes called // The string dimensions used for grouping and filtering (sometimes called
// labels in other metrics systems). // labels in other metrics systems).
// NOTE: It is important that the ordering of these fields remains consistent! // NOTE: The ordering of these fields is important and must remain consistent!
// Max of 20 blobs with total size <= 5120 bytes. // Max of 20 blobs with total size <= 5120 bytes.
const blobs = [ const blobs = [
// Project ID of the request // Project ID of the request
@ -106,24 +120,27 @@ export function recordToolCallUsage({
consumer?.stripeStatus ?? null, consumer?.stripeStatus ?? null,
// Whether the request was rate-limited // Whether the request was rate-limited
resolvedOriginToolCallResult rateLimitResult
? rateLimitResult?.passed ? rateLimitResult.passed
? 'rl-passed' ? 'rl-passed'
: 'rl-exceeded' : 'rl-exceeded'
: null, : mcpToolCallResponse?._meta?.status === 429
? 'rl-exceeded'
: null,
// Whether the request hit the cache // Whether the request hit the cache
cacheStatus ?? null, cacheStatus ?? null,
// Response status // HTTP response status
resolvedOriginToolCallResult httpResponse?.status?.toString() ||
? originResponse?.status?.toString() || (mcpToolCallResponse
(toolCallResponse ? (toolCallResponse.isError ? 'error' : '200') : null) ? mcpToolCallResponse._meta?.status?.toString() ||
: 'error' (mcpToolCallResponse?.isError ? 'error' : '200')
: 'error')
] ]
// Numberic values to record in this data point. // Numberic values to record in this data point.
// NOTE: It is important that the ordering of these fields remains consistent! // NOTE: The ordering of these fields is important and must remain consistent!
const doubles = [ const doubles = [
// Origin timespan in milliseconds // Origin timespan in milliseconds
originTimespanMs ?? 0, originTimespanMs ?? 0,

Wyświetl plik

@ -15,6 +15,8 @@ import {
/** /**
* Hono error handler that sanitizes all types of internal, http, json-rpc, and * Hono error handler that sanitizes all types of internal, http, json-rpc, and
* unexpected errors and responds with an appropate HTTP Response. * unexpected errors and responds with an appropate HTTP Response.
*
* @note This function is synchronous and must never throw.
*/ */
export function errorHandler( export function errorHandler(
err: Error | HTTPResponseError, err: Error | HTTPResponseError,
@ -61,7 +63,12 @@ export function errorHandler(
logger.error(status, err) logger.error(status, err)
if (isProd) { if (isProd) {
captureException(err) try {
captureException(err)
} catch (err_) {
// eslint-disable-next-line no-console
console.error('Error Sentry.captureException failed', err, err_)
}
} }
} else { } else {
logger.warn(status, message, err) logger.warn(status, message, err)

Wyświetl plik

@ -29,15 +29,15 @@
- raw - raw
- auth - auth
- custom auth pages for `openauth` - custom auth pages for `openauth`
- API gateway - **API gateway**
- **usage tracking and reporting** - **usage tracking and reporting**
- oauth flow - oauth flow
- https://docs.scalekit.com/guides/mcp/oauth - https://docs.scalekit.com/guides/mcp/oauth
- openapi-kitchen-sink - openapi-kitchen-sink
- mcp-kitchen-sink - mcp-kitchen-sink
- how to handle binary bodies and responses? - how to handle binary bodies and responses?
- `recordToolCallUsage` in `finally` block of http flow
- improve logger vs console for non-hono path and util methods - improve logger vs console for non-hono path and util methods
- extra `Sentry` instrumentation (`setUser`, `captureMessage`, etc)
- **Public MCP server interface** - **Public MCP server interface**
- how does oauth work with this flow? - how does oauth work with this flow?
- proper error handling support within this flow; will currently get generic errors - proper error handling support within this flow; will currently get generic errors