feat: implement fallback (#121)

Kludex · web-flow · commit 8fea6f60ca00 · 2025-11-05T14:36:40.000+01:00
diff --git a/gateway/src/gateway.ts b/gateway/src/gateway.ts
@@ -54,33 +54,51 @@ export async function gateway(
   // sort providers on priority, highest first
   providerProxies.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0))
 
-  const providerProxy = providerProxies[0]
-  if (!providerProxy) {
+  if (providerProxies.length === 0) {
     return textResponse(403, 'Forbidden - Provider not supported by this API Key')
   }
 
   const otel = new OtelTrace(request, apiKeyInfo.otelSettings, options)
 
-  const ProxyCls = getProvider(providerProxy.providerId)
-
-  const dispatchSpan = otel.startSpan()
-  const proxy = new ProxyCls({
-    request,
-    gatewayOptions: options,
-    apiKeyInfo,
-    providerProxy,
-    restOfPath,
-    ctx,
-    middlewares: options.proxyMiddlewares,
-    otelSpan: dispatchSpan,
-  })
-
-  const result = await proxy.dispatch()
-
-  // This doesn't work on streaming because the `result` object is returned as soon as we create the streaming response.
-  if (!('responseStream' in result) && !('response' in result)) {
-    const [spanName, attributes, level] = genAiOtelAttributes(result, proxy)
-    dispatchSpan.end(spanName, attributes, { level })
+  // The AI did this, but I actually find it nice.
+  let result!: Awaited<ReturnType<InstanceType<ReturnType<typeof getProvider>>['dispatch']>>
+
+  for (const providerProxy of providerProxies) {
+    const ProxyCls = getProvider(providerProxy.providerId)
+
+    const otelSpan = otel.startSpan()
+    const proxy = new ProxyCls({
+      // Since the body is consumed by the proxy, we need to clone the request.
+      request: request.clone(),
+      gatewayOptions: options,
+      apiKeyInfo,
+      providerProxy,
+      restOfPath,
+      ctx,
+      middlewares: options.proxyMiddlewares,
+      otelSpan,
+    })
+
+    result = await proxy.dispatch()
+
+    // Those responses are already closing the `otelSpan`.
+    if (!('responseStream' in result) && !('response' in result) && !('unexpectedStatus' in result)) {
+      const [spanName, attributes, level] = genAiOtelAttributes(result, proxy)
+      otelSpan.end(spanName, attributes, { level })
+    }
+
+    // Check if we should retry with the next provider.
+    if ('unexpectedStatus' in result && isRetryableError(result.unexpectedStatus)) {
+      logfire.info('Provider failed with retryable error, trying next provider', {
+        providerId: providerProxy.providerId,
+        status: result.unexpectedStatus,
+        routingGroup: providerProxy.routingGroup,
+      })
+      continue
+    }
+
+    // If it succeeds, or it's not a retryable error, we can break out of the loop.
+    break
   }
 
   let response: Response
@@ -129,7 +147,7 @@ export async function gateway(
     response = new Response(responseBody, { status: unexpectedStatus, headers: responseHeaders })
   }
 
-  // TODO(Marcelo): This needs a bit of refactoring. We need the `dispatchSpan` to be closed before we send the spans.
+  // TODO(Marcelo): This needs a bit of refactoring. We need the `otelSpan` to be closed before we send the spans.
   if (!('responseStream' in result)) {
     runAfter(ctx, 'otel.send', otel.send())
   }
@@ -231,3 +249,7 @@ function calculateExpirationTtl(ex: ExceededScope[]): number | undefined {
   d.setHours(23, 59, 59)
   return Math.floor((d.getTime() - now.getTime()) / 1000)
 }
+
+function isRetryableError(status: number): boolean {
+  return status === 429 || (status >= 500 && status <= 599)
+}
diff --git a/gateway/src/otel/attributes.ts b/gateway/src/otel/attributes.ts
@@ -1,14 +1,9 @@
-import type {
-  DefaultProviderProxy,
-  ProxyInvalidRequest,
-  ProxySuccess,
-  ProxyUnexpectedResponse,
-} from '../providers/default'
+import type { DefaultProviderProxy, ProxyInvalidRequest, ProxySuccess } from '../providers/default'
 import type { Attributes, Level } from '.'
 import type { InputMessages, OutputMessages, TextPart } from './genai'
 
 export function genAiOtelAttributes(
-  result: ProxySuccess | ProxyInvalidRequest | ProxyUnexpectedResponse,
+  result: ProxySuccess | ProxyInvalidRequest,
   provider: DefaultProviderProxy,
 ): [string, Attributes, Level] {
   const { requestModel } = result
@@ -39,21 +34,11 @@ export function genAiOtelAttributes(
       'gen_ai.usage.cache_audio_read_tokens': usage.cache_audio_read_tokens,
       'gen_ai.usage.output_audio_tokens': usage.output_audio_tokens,
     }
-  } else if ('error' in result) {
+  } else {
     const { error } = result
     spanName = `chat ${requestModel ?? 'unknown-model'}, invalid request {error}`
     attributes = { ...attributes, error }
     level = 'error'
-  } else {
-    const { unexpectedStatus, requestBody, responseBody } = result
-    spanName = `chat ${requestModel ?? 'unknown-model'}, unexpected response: {http.response.status_code}`
-    attributes = {
-      ...attributes,
-      'http.response.status_code': unexpectedStatus,
-      'http.request.body.text': requestBody,
-      'http.response.body.text': responseBody,
-    }
-    level = 'warn'
   }
   return [spanName, attributes, level]
 }
diff --git a/gateway/src/providers/default.ts b/gateway/src/providers/default.ts
@@ -312,6 +312,17 @@ export class DefaultProviderProxy {
     if (!response.ok) {
       // CAUTION: can we be charged in any way for failed requests?
       const responseBody = await response.text()
+      this.otelSpan.end(
+        `chat ${requestModel ?? 'unknown-model'}, unexpected response: {http.response.status_code}`,
+        {
+          ...attributesFromRequest(this.request),
+          ...attributesFromResponse(response),
+          'http.request.body.text': requestBodyText,
+          'http.response.body.text': responseBody,
+          'http.response.status_code': response.status,
+        },
+        { level: 'warn' },
+      )
       return {
         requestModel,
         requestBody: requestBodyText,
diff --git a/gateway/test/gateway.spec.ts b/gateway/test/gateway.spec.ts
@@ -268,3 +268,125 @@ describe('custom middleware', () => {
     expect(responses).lengthOf(1)
   })
 })
+
+describe('routing group fallback', () => {
+  test('should fallback to next provider on retryable error', async () => {
+    let attemptCount = 0
+    const providerAttempts: string[] = []
+
+    class FailFirstMiddleware implements Middleware {
+      dispatch(next: Next): Next {
+        return async (proxy: DefaultProviderProxy) => {
+          attemptCount++
+          const baseUrl = (proxy as unknown as { providerProxy: { baseUrl: string } }).providerProxy.baseUrl
+          providerAttempts.push(baseUrl)
+
+          // First provider should fail with 503
+          if (baseUrl.includes('provider1')) {
+            return {
+              requestModel: 'gpt-5',
+              requestBody: '{}',
+              unexpectedStatus: 503,
+              responseHeaders: new Headers(),
+              responseBody: JSON.stringify({ error: 'Service unavailable' }),
+            }
+          }
+
+          // Second provider should succeed
+          return await next(proxy)
+        }
+      }
+    }
+
+    const ctx = createExecutionContext()
+    const request = new Request<unknown, IncomingRequestCfProperties>('https://example.com/chat/gpt-5', {
+      method: 'POST',
+      headers: { Authorization: 'fallback-test', 'pydantic-ai-gateway-routing-group': 'test-group' },
+      body: JSON.stringify({ model: 'gpt-5', messages: [{ role: 'user', content: 'Hello' }] }),
+    })
+
+    const gatewayEnv = buildGatewayEnv(env, [], fetch, undefined, [new FailFirstMiddleware()])
+    const response = await gatewayFetch(request, new URL(request.url), ctx, gatewayEnv)
+    await waitOnExecutionContext(ctx)
+
+    expect(response.status).toBe(200)
+    expect(attemptCount).toBe(2)
+    expect(providerAttempts).toEqual(['http://test.example.com/provider1', 'http://test.example.com/provider2'])
+
+    // Verify the response came from the second provider
+    const content = (await response.json()) as { choices: [{ message: { content: string } }] }
+    expect(content.choices[0].message.content).toMatchInlineSnapshot(
+      `"request URL: http://test.example.com/provider2/gpt-5"`,
+    )
+  })
+
+  test('should not fallback on non-retryable error', async () => {
+    let attemptCount = 0
+
+    class FailWithBadRequestMiddleware implements Middleware {
+      dispatch(_next: Next): Next {
+        return (_proxy: DefaultProviderProxy) => {
+          attemptCount++
+          // Return 400 error (non-retryable)
+          return Promise.resolve({
+            requestModel: 'gpt-5',
+            requestBody: '{}',
+            unexpectedStatus: 400,
+            responseHeaders: new Headers(),
+            responseBody: JSON.stringify({ error: 'Bad request' }),
+          })
+        }
+      }
+    }
+
+    const ctx = createExecutionContext()
+    const request = new Request<unknown, IncomingRequestCfProperties>('https://example.com/chat/gpt-5', {
+      method: 'POST',
+      headers: { Authorization: 'fallback-test', 'pydantic-ai-gateway-routing-group': 'test-group' },
+      body: JSON.stringify({ model: 'gpt-5', messages: [{ role: 'user', content: 'Hello' }] }),
+    })
+
+    const gatewayEnv = buildGatewayEnv(env, [], fetch, undefined, [new FailWithBadRequestMiddleware()])
+    const response = await gatewayFetch(request, new URL(request.url), ctx, gatewayEnv)
+    await waitOnExecutionContext(ctx)
+
+    // Should fail immediately without trying fallback
+    expect(response.status).toBe(400)
+    expect(attemptCount).toBe(1)
+  })
+
+  test('should return error if all providers fail', async () => {
+    let attemptCount = 0
+
+    class FailAllMiddleware implements Middleware {
+      dispatch(_next: Next): Next {
+        return (_proxy: DefaultProviderProxy) => {
+          attemptCount++
+          // Always return 503
+          return Promise.resolve({
+            requestModel: 'gpt-5',
+            requestBody: '{}',
+            unexpectedStatus: 503,
+            responseHeaders: new Headers(),
+            responseBody: JSON.stringify({ error: 'Service unavailable' }),
+          })
+        }
+      }
+    }
+
+    const ctx = createExecutionContext()
+    const request = new Request<unknown, IncomingRequestCfProperties>('https://example.com/chat/gpt-5', {
+      method: 'POST',
+      headers: { Authorization: 'fallback-test', 'pydantic-ai-gateway-routing-group': 'test-group' },
+      body: JSON.stringify({ model: 'gpt-5', messages: [{ role: 'user', content: 'Hello' }] }),
+    })
+
+    const gatewayEnv = buildGatewayEnv(env, [], fetch, undefined, [new FailAllMiddleware()])
+    const response = await gatewayFetch(request, new URL(request.url), ctx, gatewayEnv)
+    await waitOnExecutionContext(ctx)
+
+    // Should try both providers and fail with last error
+    expect(response.status).toBe(503)
+    expect(attemptCount).toBe(2)
+  })
+})
diff --git a/gateway/test/worker.ts b/gateway/test/worker.ts
@@ -50,6 +50,7 @@ export namespace IDS {
   export const keyHealthy = 4
   export const keyDisabled = 5
   export const keyTinyLimit = 6
+  export const keyFallbackTest = 7
 }
 
 class TestKeysDB extends KeysDbD1 {
@@ -153,6 +154,34 @@ class TestKeysDB extends KeysDbD1 {
           projectSpendingLimitMonthly: 4,
           providers: [this.allProviders[0]!],
         }
+      case 'fallback-test':
+        return {
+          id: IDS.keyFallbackTest,
+          project: IDS.projectDefault,
+          org: IDS.orgDefault,
+          key,
+          status: 'active',
+          providers: [
+            {
+              baseUrl: 'http://test.example.com/provider1',
+              providerId: 'test',
+              injectCost: true,
+              credentials: 'test1',
+              apiTypes: ['chat'],
+              routingGroup: 'test-group',
+              priority: 100,
+            },
+            {
+              baseUrl: 'http://test.example.com/provider2',
+              providerId: 'test',
+              injectCost: true,
+              credentials: 'test2',
+              apiTypes: ['chat'],
+              routingGroup: 'test-group',
+              priority: 50,
+            },
+          ],
+        }
       default:
         return null
     }