diff --git a/apps/e2e/src/__snapshots__/http-e2e.test.ts.snap b/apps/e2e/src/__snapshots__/http-e2e.test.ts.snap index 3cd4795e..57617db2 100644 --- a/apps/e2e/src/__snapshots__/http-e2e.test.ts.snap +++ b/apps/e2e/src/__snapshots__/http-e2e.test.ts.snap @@ -250,6 +250,8 @@ exports[`HTTP => OpenAPI origin everything "echo" tool with empty body > 9.0: PO exports[`HTTP => OpenAPI origin everything "echo" tool with empty body > 9.1: POST @dev/test-everything-openapi/echo 1`] = `{}`; +exports[`HTTP => OpenAPI origin everything "echo_headers" tool > 12.0: GET @dev/test-everything-openapi@e738c8aa/custom_rate_limit_tool 1`] = `{}`; + exports[`HTTP => OpenAPI origin everything "pure" tool > 7.0: POST @dev/test-everything-openapi/pure 1`] = ` { "foo": "bar", diff --git a/apps/e2e/src/http-e2e.test.ts b/apps/e2e/src/http-e2e.test.ts index 65a2990e..46c1140b 100644 --- a/apps/e2e/src/http-e2e.test.ts +++ b/apps/e2e/src/http-e2e.test.ts @@ -16,7 +16,13 @@ const ky = defaultKy.extend({ }) for (const [i, fixtureSuite] of fixtureSuites.entries()) { - const { title, fixtures, compareResponseBodies = false } = fixtureSuite + const { + title, + fixtures, + compareResponseBodies = false, + repeat = 1, + repeatSuccessCriteria = 'all' + } = fixtureSuite const describeFn = fixtureSuite.only ? describe.only : describe describeFn(title, () => { @@ -56,74 +62,116 @@ for (const [i, fixtureSuite] of fixtureSuites.entries()) { }, // eslint-disable-next-line no-loop-func async () => { - const res = await ky(fixture.path, { - timeout, - ...fixture.request - }) + const numIterations = repeat ?? 1 + let numRepeatSuccesses = 0 - if (res.status !== status && res.status >= 500) { - let body: any - try { - body = await res.json() - } catch {} + for (let iteration = 0; iteration < numIterations; ++iteration) { + const repeatIterationPrefix = repeat + ? `[${iteration}/${numIterations}] ` + : '' - console.error(`${fixtureName} => UNEXPECTED ERROR ${res.status}`, { - body + const res = await ky(fixture.path, { + timeout, + ...fixture.request }) - } - expect(res.status).toBe(status) + if (res.status !== status && res.status >= 500) { + let body: any + try { + body = await res.json() + } catch {} - const { type } = contentType.safeParse( - res.headers.get('content-type') ?? '' - ) - expect(type).toBe(expectedContentType) - - let body: any - - if (type.includes('json')) { - try { - body = await res.json() - } catch (err) { - console.error('json error', err) - throw err + console.error( + `${repeatIterationPrefix}${fixtureName} => UNEXPECTED ERROR ${res.status}`, + { + body + } + ) } - } else if (type.includes('text')) { - body = await res.text() - } else { - body = await res.arrayBuffer() - } - if (debugFixture) { - console.log(`${fixtureName} => ${res.status}`, { - body, - headers: Object.fromEntries(res.headers.entries()) - }) - } + if (repeat) { + if (res.status === status) { + ++numRepeatSuccesses + } else { + if (debugFixture) { + console.log( + `${repeatIterationPrefix}${fixtureName} => ${res.status} (invalid; expected ${status})`, + { + headers: Object.fromEntries(res.headers.entries()) + } + ) + } - if (expectedBody) { - expect(body).toEqual(expectedBody) - } - - if (validate) { - await Promise.resolve(validate(body)) - } - - if (snapshot) { - expect(body).toMatchSnapshot() - } - - if (expectedHeaders) { - for (const [key, value] of Object.entries(expectedHeaders)) { - expect(res.headers.get(key)).toBe(value) - } - } - - if (compareResponseBodies && status >= 200 && status < 300) { - if (!fixtureResponseBody) { - fixtureResponseBody = body + continue + } } else { - expect(body).toEqual(fixtureResponseBody) + expect(res.status).toBe(status) + } + + const { type } = contentType.safeParse( + res.headers.get('content-type') ?? '' + ) + expect(type).toBe(expectedContentType) + + let body: any + + if (type.includes('json')) { + try { + body = await res.json() + } catch (err) { + console.error('json error', err) + throw err + } + } else if (type.includes('text')) { + body = await res.text() + } else { + body = await res.arrayBuffer() + } + + if (debugFixture) { + console.log( + `${repeatIterationPrefix}${fixtureName} => ${res.status}`, + { + body, + headers: Object.fromEntries(res.headers.entries()) + } + ) + } + + if (expectedBody) { + expect(body).toEqual(expectedBody) + } + + if (validate) { + await Promise.resolve(validate(body)) + } + + if (snapshot) { + expect(body).toMatchSnapshot() + } + + if (expectedHeaders) { + for (const [key, value] of Object.entries(expectedHeaders)) { + expect(res.headers.get(key)).toBe(value) + } + } + + if (compareResponseBodies && status >= 200 && status < 300) { + if (!fixtureResponseBody) { + fixtureResponseBody = body + } else { + expect(body).toEqual(fixtureResponseBody) + } + } + } + + if (repeat) { + if (repeatSuccessCriteria === 'all') { + expect(numRepeatSuccesses).toBe(numIterations) + } else if (repeatSuccessCriteria === 'some') { + expect(numRepeatSuccesses).toBeGreaterThan(0) + } else if (typeof repeatSuccessCriteria === 'function') { + expect(repeatSuccessCriteria(numRepeatSuccesses)).toBe(true) } } } diff --git a/apps/e2e/src/http-fixtures.ts b/apps/e2e/src/http-fixtures.ts index 6f94da0b..8986a3d2 100644 --- a/apps/e2e/src/http-fixtures.ts +++ b/apps/e2e/src/http-fixtures.ts @@ -52,6 +52,15 @@ export type E2ETestFixtureSuite = { /** @default undefined */ snapshot?: boolean + + /** @default undefined */ + repeat?: number + + /** @default 'all' */ + repeatSuccessCriteria?: + | 'all' + | 'some' + | ((numRepeatSuccesses: number) => boolean) } const now = Date.now() @@ -653,5 +662,23 @@ export const fixtureSuites: E2ETestFixtureSuite[] = [ } } ] + }, + { + title: 'HTTP => OpenAPI origin everything "custom_rate_limit_tool"', + only: true, + repeat: 1, + repeatSuccessCriteria: (numRepeatSuccesses) => numRepeatSuccesses <= 2, + fixtures: [ + { + path: '@dev/test-everything-openapi/custom_rate_limit_tool', + response: { + status: 429, + headers: { + 'ratelimit-policy': '2;w=30', + 'ratelimit-limit': '2' + } + } + } + ] } ] diff --git a/apps/gateway/src/lib/handle-mcp-tool-call-error.ts b/apps/gateway/src/lib/handle-mcp-tool-call-error.ts index bda12ac4..5bd8f237 100644 --- a/apps/gateway/src/lib/handle-mcp-tool-call-error.ts +++ b/apps/gateway/src/lib/handle-mcp-tool-call-error.ts @@ -1,5 +1,6 @@ import type { ContentfulStatusCode } from 'hono/utils/http-status' import { HttpError } from '@agentic/platform-core' +import { suppressedHttpStatuses } from '@agentic/platform-hono' import * as Sentry from '@sentry/cloudflare' import { HTTPException } from 'hono/http-exception' import { HTTPError } from 'ky' @@ -68,21 +69,23 @@ export function handleMcpToolCallError( status = 500 } - if (status === 500) { - // eslint-disable-next-line no-console - console.error(`mcp tool call "${toolName}" error`, status, err) + if (!suppressedHttpStatuses.has(status)) { + if (status >= 500) { + // eslint-disable-next-line no-console + console.error(`mcp tool call "${toolName}" error`, status, err) - if (isProd) { - try { - Sentry.captureException(err) - } catch (err_) { - // eslint-disable-next-line no-console - console.error('Error Sentry.captureException failed', err, err_) + if (isProd) { + try { + Sentry.captureException(err) + } catch (err_) { + // eslint-disable-next-line no-console + console.error('Error Sentry.captureException failed', err, err_) + } } + } else { + // eslint-disable-next-line no-console + console.warn(`mcp tool call "${toolName}" warning`, status, err) } - } else { - // eslint-disable-next-line no-console - console.warn(`mcp tool call "${toolName}" warning`, status, err) } ;(res._meta!.agentic as any).status = status diff --git a/apps/gateway/src/lib/rate-limits/durable-rate-limiter.ts b/apps/gateway/src/lib/rate-limits/durable-rate-limiter.ts index 8712e8ff..ac957dea 100644 --- a/apps/gateway/src/lib/rate-limits/durable-rate-limiter.ts +++ b/apps/gateway/src/lib/rate-limits/durable-rate-limiter.ts @@ -30,15 +30,24 @@ export class DurableRateLimiterBase extends DurableObject { // Update the alarm const currentAlarm = await this.ctx.storage.getAlarm() - if (currentAlarm == null) { + if (!currentAlarm) { await this.ctx.storage.setAlarm(resetTimeMs) } await this.ctx.storage.put('value', state) + + // const updatedState = await this.ctx.storage.get('value') + // console.log('update', this.ctx.id, { + // existingState, + // state, + // updatedState + // }) + return state } async reset() { + // console.log('reset rate-limit', this.ctx.id) await this.ctx.storage.put('value', initialState) } diff --git a/apps/gateway/src/lib/rate-limits/enforce-rate-limit.ts b/apps/gateway/src/lib/rate-limits/enforce-rate-limit.ts index 6a12ec65..c103a3bf 100644 --- a/apps/gateway/src/lib/rate-limits/enforce-rate-limit.ts +++ b/apps/gateway/src/lib/rate-limits/enforce-rate-limit.ts @@ -21,7 +21,7 @@ export async function enforceRateLimit({ interval, limit, cost = 1, - async = true, + async: _async = true, env, cache = globalRateLimitCache, waitUntil @@ -61,20 +61,20 @@ export async function enforceRateLimit({ }): Promise { assert(id, 400, 'Unauthenticated requests must have a valid IP address') + const async = false + const intervalMs = interval * 1000 const now = Date.now() - let rateLimitState: RateLimitState = cache.get(id) ?? { + const initialRateLimitState = cache.get(id) ?? { current: 0, resetTimeMs: now + intervalMs } + let rateLimitState = initialRateLimitState function updateCache(info: RateLimitState) { - const current = cache.get(id)?.current ?? 0 - if (current && info.current > current) { - cache.set(id, info) - rateLimitState = info - } + cache.set(id, info) + rateLimitState = info } /** @@ -124,6 +124,13 @@ export async function enforceRateLimit({ updateCache(updatedRateLimitState) } + // console.log('rateLimit', { + // id, + // initial: initialRateLimitState, + // current: rateLimitState, + // cost + // }) + return { id, passed: rateLimitState.current <= limit, diff --git a/packages/fixtures/valid/everything-openapi/agentic.config.ts b/packages/fixtures/valid/everything-openapi/agentic.config.ts index 9b001e5e..40158626 100644 --- a/packages/fixtures/valid/everything-openapi/agentic.config.ts +++ b/packages/fixtures/valid/everything-openapi/agentic.config.ts @@ -59,7 +59,8 @@ export default defineConfig({ name: 'custom_rate_limit_tool', rateLimit: { interval: '30s', - limit: 2 + limit: 2, + mode: 'strict' } }, { diff --git a/packages/fixtures/valid/everything-openapi/src/routes/custom-rate-limit-approximate-tool.ts b/packages/fixtures/valid/everything-openapi/src/routes/custom-rate-limit-approximate-tool.ts new file mode 100644 index 00000000..508d57a1 --- /dev/null +++ b/packages/fixtures/valid/everything-openapi/src/routes/custom-rate-limit-approximate-tool.ts @@ -0,0 +1,33 @@ +import { createRoute, type OpenAPIHono, z } from '@hono/zod-openapi' + +const route = createRoute({ + description: 'Custom rate limit tool (approximate mode)', + operationId: 'customRateLimitApproximateTool', + method: 'post', + path: '/custom-rate-limit-approximate-tool', + request: { + body: { + content: { + 'application/json': { + schema: z.object({}).passthrough() + } + } + } + }, + responses: { + 200: { + description: 'Echoed request body', + content: { + 'application/json': { + schema: z.object({}).passthrough() + } + } + } + } +}) + +export function registerCustomRateLimitApproximateTool(app: OpenAPIHono) { + return app.openapi(route, async (c) => { + return c.json(c.req.valid('json')) + }) +} diff --git a/packages/fixtures/valid/everything-openapi/src/routes/custom-rate-limit-tool.ts b/packages/fixtures/valid/everything-openapi/src/routes/custom-rate-limit-tool.ts index 59b08bbc..f7040394 100644 --- a/packages/fixtures/valid/everything-openapi/src/routes/custom-rate-limit-tool.ts +++ b/packages/fixtures/valid/everything-openapi/src/routes/custom-rate-limit-tool.ts @@ -1,7 +1,7 @@ import { createRoute, type OpenAPIHono, z } from '@hono/zod-openapi' const route = createRoute({ - description: 'Custom rate limit tool', + description: 'Custom rate limit tool (strict mode)', operationId: 'customRateLimitTool', method: 'post', path: '/custom-rate-limit-tool', diff --git a/packages/fixtures/valid/everything-openapi/src/server.ts b/packages/fixtures/valid/everything-openapi/src/server.ts index da1cb627..39bd607b 100644 --- a/packages/fixtures/valid/everything-openapi/src/server.ts +++ b/packages/fixtures/valid/everything-openapi/src/server.ts @@ -4,6 +4,7 @@ import { logger as honoLogger } from 'hono/logger' import { initExitHooks } from './exit-hooks' import { registerCustomCacheControlTool } from './routes/custom-cache-control-tool' +import { registerCustomRateLimitApproximateTool } from './routes/custom-rate-limit-approximate-tool' import { registerCustomRateLimitTool } from './routes/custom-rate-limit-tool' import { registerDisabledForFreePlanTool } from './routes/disabled-for-free-plan-tool' import { registerDisabledRateLimitTool } from './routes/disabled-rate-limit-tool' @@ -31,6 +32,7 @@ registerEchoHeaders(app) registerPure(app) registerUnpureMarkedPure(app) registerCustomCacheControlTool(app) +registerCustomRateLimitApproximateTool(app) registerNoStoreCacheControlTool(app) registerNoCacheCacheControlTool(app) registerCustomRateLimitTool(app) diff --git a/packages/hono/src/error-handler.ts b/packages/hono/src/error-handler.ts index 499cf599..0b6946e7 100644 --- a/packages/hono/src/error-handler.ts +++ b/packages/hono/src/error-handler.ts @@ -12,6 +12,10 @@ import { JsonRpcErrorCodes } from './json-rpc-errors' +// Don't log 429 errors because they may happen frequently and are just noise. +// Our access-logger should still log the 429 result, just not the whole error. +export const suppressedHttpStatuses = new Set([429]) + /** * Hono error handler that sanitizes all types of internal, http, json-rpc, and * unexpected errors and responds with an appropate HTTP Response. @@ -59,19 +63,21 @@ export function errorHandler( status = 500 } - if (status >= 500) { - logger.error(status, err) + if (!suppressedHttpStatuses.has(status)) { + if (status >= 500) { + logger.error(status, err) - if (isProd) { - try { - captureException(err) - } catch (err_) { - // eslint-disable-next-line no-console - console.error('Error Sentry.captureException failed', err, err_) + if (isProd) { + try { + captureException(err) + } catch (err_) { + // eslint-disable-next-line no-console + console.error('Error Sentry.captureException failed', err, err_) + } } + } else { + logger.warn(status, err) } - } else { - logger.warn(status, err) } if (isJsonRpcRequest) { diff --git a/packages/types/src/pricing.ts b/packages/types/src/pricing.ts index 21869a85..57804c7b 100644 --- a/packages/types/src/pricing.ts +++ b/packages/types/src/pricing.ts @@ -639,10 +639,10 @@ export type StripeSubscriptionItemIdMap = z.infer< * per minute per customer. */ export const defaultRequestsRateLimit = { + enabled: true, interval: 60, limit: 1000, - async: true, - enabled: true + mode: 'approximate' } as const satisfies Readonly /** diff --git a/packages/types/src/rate-limit.test.ts b/packages/types/src/rate-limit.test.ts index cfe048da..af6474b7 100644 --- a/packages/types/src/rate-limit.test.ts +++ b/packages/types/src/rate-limit.test.ts @@ -25,7 +25,7 @@ test('rateLimitSchema valid', () => { rateLimitSchema.parse({ interval: '1 day', limit: 1000, - async: false + mode: 'strict' }) ).toMatchSnapshot() @@ -39,7 +39,7 @@ test('rateLimitSchema valid', () => { rateLimitSchema.parse({ interval: '10m', limit: 100, - async: false, + mode: 'strict', enabled: false }) ).toMatchSnapshot() @@ -79,28 +79,28 @@ test('RateLimit types', () => { expectTypeOf({ interval: 10, limit: 100, - async: false, + mode: 'approximate', enabled: true } as const).toExtend() expectTypeOf<{ interval: 10 limit: 100 - async: false + mode: 'strict' enabled: true }>().toExtend() expectTypeOf({ interval: '10s', limit: 100, - async: true, + mode: 'strict', enabled: true } as const).not.toExtend() expectTypeOf<{ interval: '10s' limit: 100 - async: false + mode: 'strict' }>().not.toExtend() expectTypeOf({ @@ -126,13 +126,13 @@ test('RateLimitInput types', () => { expectTypeOf({ interval: 10, limit: 100, - async: false + mode: 'strict' } as const).toExtend() expectTypeOf<{ interval: 10 limit: 100 - async: boolean + mode: 'approximate' }>().toExtend() expectTypeOf({ diff --git a/packages/types/src/rate-limit.ts b/packages/types/src/rate-limit.ts index b8ccfbaa..98a6b9c2 100644 --- a/packages/types/src/rate-limit.ts +++ b/packages/types/src/rate-limit.ts @@ -8,12 +8,20 @@ import parseIntervalAsMs from 'ms' // z.literal('all') // ]) +export const rateLimitModeSchema = z.union([ + z.literal('strict'), + z.literal('approximate') +]) + /** * Rate limit config for metered LineItems. */ export const rateLimitSchema = z .union([ z.object({ + /** + * Whether or not this rate limit is enabled. + */ enabled: z.literal(false) }), z.object({ @@ -77,26 +85,29 @@ export const rateLimitSchema = z .describe('Maximum number of operations per interval (unitless).'), /** - * Whether to enforce the rate limit synchronously or asynchronously. + * How to enforce the rate limit: * - * The default rate-limiting mode is asynchronous, which means that requests - * are allowed to proceed immediately, with the limit being enforced in the - * background. This is much faster than synchronous mode, but it is less - * consistent if precise adherence to rate-limits is required. + * - `strict` (more precise but slower) + * - `approximate` (the default; faster and asynchronous but less precise). * - * With synchronous mode, requests are blocked until the current limit has + * The default rate-limiting mode is `approximate`, which means that requests + * are allowed to proceed immediately, with the limit being enforced + * asynchronously in the background. This is much faster than synchronous + * mode, but it is less consistent if precise adherence to rate-limits is + * required. + * + * With `strict` mode, requests are blocked until the current limit has * been confirmed. The downside with this approach is that it introduces * more latency to every request by default. The advantage is that it is * more precise and consistent. * - * @default true + * @default "approximate" */ - async: z - .boolean() + mode: rateLimitModeSchema .optional() - .default(true) + .default('approximate') .describe( - 'Whether to enforce the rate limit synchronously (strict but slower) or asynchronously (approximate and faster, the default).' + 'How to enforce the rate limit: "strict" (more precise but slower) or "approximate" (the default; faster and asynchronous but less precise).' ), // TODO: Consider adding support for this in the future @@ -112,6 +123,11 @@ export const rateLimitSchema = z // */ // rateLimitBy: rateLimitBySchema.optional().default('customer'), + /** + * Whether or not this rate limit is enabled. + * + * @default true + */ enabled: z.boolean().optional().default(true) }) ])