From 4421085884bed001e40cf11a7fdf949a0525731e Mon Sep 17 00:00:00 2001 From: Marcelo Trylesinski Date: Thu, 6 Nov 2025 13:26:23 +0100 Subject: [PATCH 1/3] fix(google-vertex): run anthropic on vertex --- gateway/src/providers/default.ts | 1 + gateway/src/providers/google/index.ts | 38 ++++++++++++-- gateway/test/providers/google.spec.ts | 29 +++++++++++ gateway/test/providers/google.spec.ts.snap | 41 +++++++++++++++ gateway/test/setup.ts | 6 +-- gateway/test/worker.ts | 1 + ...vertex-google-vertex-anthropic-client.yaml | 52 +++++++++++++++++++ proxy-vcr/proxy_vcr/main.py | 32 ++++++++++-- 8 files changed, 189 insertions(+), 11 deletions(-) create mode 100644 proxy-vcr/proxy_vcr/cassettes/google-vertex-google-vertex-anthropic-client.yaml diff --git a/gateway/src/providers/default.ts b/gateway/src/providers/default.ts index cffb831..dc0af96 100644 --- a/gateway/src/providers/default.ts +++ b/gateway/src/providers/default.ts @@ -106,6 +106,7 @@ export class DefaultProviderProxy { protected middlewares: Middleware[] protected otelSpan: OtelSpan + protected requestModel: string | null = null // NOTE: Those fields are used only for streaming responses for the time being. protected usage: Usage | null = null protected responseModel: string | null = null diff --git a/gateway/src/providers/google/index.ts b/gateway/src/providers/google/index.ts index 4c2d643..2e131bf 100644 --- a/gateway/src/providers/google/index.ts +++ b/gateway/src/providers/google/index.ts @@ -1,12 +1,13 @@ import type { ModelAPI } from '../../api' import { AnthropicAPI } from '../../api/anthropic' -import { GoogleAPI, type GoogleRequest } from '../../api/google' -import { DefaultProviderProxy } from '../default' +import { GoogleAPI } from '../../api/google' +import { DefaultProviderProxy, type JsonData } from '../default' import { authToken, getProjectId } from './auth' export class GoogleVertexProvider extends DefaultProviderProxy { protected usageField = 'usageMetadata' flavor: 'default' | 'anthropic' = 'default' + shouldStream: boolean = false url() { if (this.providerProxy.baseUrl) { @@ -54,6 +55,15 @@ export class GoogleVertexProvider extends DefaultProviderProxy { * @param region - The region to replace in the path. */ private replacePath(projectId: string, region: string): null | string { + const pathWithoutQuery = this.restOfPath.split('?')[0] + console.log('pathWithoutQuery', pathWithoutQuery) + if (pathWithoutQuery === 'v1/messages') { + console.log('this.shouldStream', this.shouldStream) + console.log('this.requestModel', this.requestModel) + const action = this.shouldStream ? 'streamRawPredict' : 'rawPredict' + return `/v1/projects/${projectId}/locations/${region}/publishers/anthropic/models/${this.requestModel}:${action}` + } + // Regex with capture groups: version (optional), publisher (optional), model // Path may or may not start with / and may or may not have version const regex = @@ -73,17 +83,30 @@ export class GoogleVertexProvider extends DefaultProviderProxy { } const path = `/${version}/projects/${projectId}/locations/${region}/publishers/${publisher}/models/${modelAndApi}` + console.log('this.restOfPath', this.restOfPath) return path } async prepRequest() { const requestBodyText = await this.request.text() - let requestBodyData: GoogleRequest + let requestBodyData: JsonData try { requestBodyData = JSON.parse(requestBodyText) } catch (_error) { return { error: 'invalid request JSON' } } + + const pathWithoutQuery = this.restOfPath.split('?')[0] + if (pathWithoutQuery === 'v1/messages') { + console.log('this is called after!') + this.flavor = 'anthropic' + if (!('model' in requestBodyData)) { + return { error: 'model not found in Anthropic request body' } + } + this.requestModel = requestBodyData.model as string + return { requestBodyText, requestBodyData, requestModel: this.requestModel } + } + const m = /\/models\/(.+?):/.exec(this.restOfPath) if (m) { return { requestBodyText, requestBodyData, requestModel: m[1] } @@ -92,6 +115,15 @@ export class GoogleVertexProvider extends DefaultProviderProxy { } } + protected isStreaming(responseHeaders: Headers, requestBodyData: object): boolean { + if (this.flavor === 'anthropic') { + this.shouldStream = !!('stream' in requestBodyData && requestBodyData.stream === true) + } else { + this.shouldStream = super.isStreaming(responseHeaders, requestBodyData) + } + return this.shouldStream + } + async requestHeaders(headers: Headers): Promise { const token = await authToken(this.providerProxy.credentials, this.options.kv, this.options.subFetch) headers.set('Authorization', `Bearer ${token}`) diff --git a/gateway/test/providers/google.spec.ts b/gateway/test/providers/google.spec.ts index 1daa062..4bd8b71 100644 --- a/gateway/test/providers/google.spec.ts +++ b/gateway/test/providers/google.spec.ts @@ -79,4 +79,33 @@ describe('google', () => { expect(otelBatch, 'otelBatch length not 1').toHaveLength(1) expect(deserializeRequest(otelBatch[0]!)).toMatchSnapshot('span') }) + + test('google-vertex/anthropic-client', async ({ gateway }) => { + const { fetch, otelBatch } = gateway + + const anthropicBody = JSON.stringify({ + model: 'claude-sonnet-4', + max_tokens: 1024, + messages: [{ role: 'user', content: 'What is the capital of Brazil?' }], + }) + + const response = await fetch('https://example.com/anthropic/v1/messages', { + method: 'POST', + headers: { + Authorization: 'healthy', + 'content-type': 'application/json', + 'content-length': anthropicBody.length.toString(), + 'x-vcr-filename': 'google-vertex-anthropic-client', + 'pydantic-ai-gateway-profile': 'google-vertex', + 'anthropic-version': 'vertex-2023-10-16', + }, + body: anthropicBody, + }) + + const content = await response.text() + + expect(content).toMatchSnapshot('llm') + expect(otelBatch, 'otelBatch length not 1').toHaveLength(1) + expect(deserializeRequest(otelBatch[0]!)).toMatchSnapshot('span') + }) }) diff --git a/gateway/test/providers/google.spec.ts.snap b/gateway/test/providers/google.spec.ts.snap index a55329e..6750b83 100644 --- a/gateway/test/providers/google.spec.ts.snap +++ b/gateway/test/providers/google.spec.ts.snap @@ -1,5 +1,46 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html +exports[`google > google-vertex/anthropic-client > llm 1`] = `"{"error":{"code":404,"message":"Requested entity was not found.","status":"NOT_FOUND"}}"`; + +exports[`google > google-vertex/anthropic-client > span 1`] = ` +[ + { + "attributes": { + "http.request.body.text": "{"model":"claude-sonnet-4","max_tokens":1024,"messages":[{"role":"user","content":"What is the capital of Brazil?"}]}", + "http.request.header.anthropic-version": "vertex-2023-10-16", + "http.request.header.authorization": "healthy", + "http.request.header.content-length": "117", + "http.request.header.content-type": "application/json", + "http.request.header.pydantic-ai-gateway-profile": "google-vertex", + "http.request.header.x-vcr-filename": "google-vertex-anthropic-client", + "http.request.method": "POST", + "http.response.body.text": "{"error":{"code":404,"message":"Requested entity was not found.","status":"NOT_FOUND"}}", + "http.response.header.content-length": "87", + "http.response.header.content-type": "application/json", + "http.response.header.server": "uvicorn", + "http.response.status_code": 404, + "logfire.json_schema": "{"type":"object","properties":{"http.request.method":{"type":"string"},"url.full":{"type":"string"},"http.request.header.anthropic-version":{"type":"string"},"http.request.header.authorization":{"type":"string"},"http.request.header.content-length":{"type":"string"},"http.request.header.content-type":{"type":"string"},"http.request.header.pydantic-ai-gateway-profile":{"type":"string"},"http.request.header.x-vcr-filename":{"type":"string"},"http.response.status_code":{"type":"number"},"http.response.header.content-length":{"type":"string"},"http.response.header.content-type":{"type":"string"},"http.response.header.server":{"type":"string"},"http.request.body.text":{"type":"string"},"http.response.body.text":{"type":"string"}}}", + "logfire.level_num": 13, + "logfire.msg": "chat claude-sonnet-4, unexpected response: 404", + "url.full": "https://example.com/anthropic/v1/messages", + }, + "events": [], + "kind": 1, + "links": [], + "name": "chat claude-sonnet-4, unexpected response: {http.response.status_code}", + "parentSpanId": undefined, + "resource": { + "service.name": "PAIG", + "service.version": "test", + }, + "scope": "pydantic-ai-gateway", + "status": { + "code": 1, + }, + }, +] +`; + exports[`google > google-vertex/default > llm 1`] = ` "data: {"candidates": [{"content": {"role": "model","parts": [{"functionCall": {"name": "final_result","args": {"name": "Samuel","city": "London","dob": "1987-01-28"}},"thoughtSignature": "CswDAePx/15lDrSIcIjN85FpyyOl3oASu2R23QD4Z3cj5XRUKOD/3/mMqcHv5AeXc/L+P1eVNpq5C3xM9/8zil6gOOZI91F/r1kmKvxmCECD1SW+p1dtzG6eljHd51vd2Gx7eqtEek1ORzeLP4zSWY2GDlZA9fZIs/uIfhLyOlkiiB1P/GAAUmBPT/TZqdOpQZXBt8MAUrbTOQfbhQ1qbxdrYRveZRMzXS898K6NmjrN5quNiaUgwEbc2J6NAoDOl5jdK8tIt7m25qdpjSYMpAGYD0c0Le2yPf8eO6A9J6zYp1lqVCTifby4/nP5RkVM2e1L4pH6oYisgQsyDUSEDCSG2GXhGO9WU5wFYwkjlhB8ghLU92kVgr/Rq54K/0GaYGBgzKi+YrD8c6QKSTZWTw/46D8lQ1goL1Y1YdtRiRGNFRJqKmSbkxjIOQoIk4glxcsm7L2lucL4miz4yZioBe7HEPDtmQY0FEKM5DQhwuj+AF1bBl5WE/9NYUsuGY3DxSi0lVd2v8+zd0op9U8z3PX4cmdKVkf/kA9TuQSPXBZpYgj7CssoMBT4ssLVoUgetOrNvJL99liSvsPOBZaLDKpNTXQA9GdZvQltoSolOg=="}]},"finishReason": "STOP","avgLogprobs": -0.48033348719278973}],"usageMetadata": {"promptTokenCount": 79,"candidatesTokenCount": 18,"totalTokenCount": 240,"trafficType": "ON_DEMAND","promptTokensDetails": [{"modality": "TEXT","tokenCount": 79}],"candidatesTokensDetails": [{"modality": "TEXT","tokenCount": 18}],"thoughtsTokenCount": 143},"modelVersion": "gemini-2.5-flash","createTime": "2025-11-04T15:31:56.289691Z","responseId": "bBwKaZvXEemDn9kP56mN8QI"} diff --git a/gateway/test/setup.ts b/gateway/test/setup.ts index 334ee5e..bea3cd7 100644 --- a/gateway/test/setup.ts +++ b/gateway/test/setup.ts @@ -52,9 +52,9 @@ function testGateway(): TestGateway { const bodyArray = init?.body as Uint8Array otelBatch.push(new TextDecoder().decode(bodyArray)) return new Response('OK', { status: 200 }) - } else if (hostname === 'oauth2.googleapis.com') { - // Mock GCP token response for tests - return new Response(JSON.stringify({ access_token: 'mock-gcp-token' }), { status: 200 }) + // } else if (hostname === 'oauth2.googleapis.com') { + // // Mock GCP token response for tests + // return new Response(JSON.stringify({ access_token: 'mock-gcp-token' }), { status: 200 }) } else { return await fetch(url, init) } diff --git a/gateway/test/worker.ts b/gateway/test/worker.ts index 955add7..927a429 100644 --- a/gateway/test/worker.ts +++ b/gateway/test/worker.ts @@ -106,6 +106,7 @@ class TestKeysDB extends KeysDbD1 { injectCost: true, credentials: env.GOOGLE_SERVICE_ACCOUNT_KEY, apiTypes: ['gemini', 'anthropic'], + profile: 'google-vertex', }, ] } diff --git a/proxy-vcr/proxy_vcr/cassettes/google-vertex-google-vertex-anthropic-client.yaml b/proxy-vcr/proxy_vcr/cassettes/google-vertex-google-vertex-anthropic-client.yaml new file mode 100644 index 0000000..3faf39d --- /dev/null +++ b/proxy-vcr/proxy_vcr/cassettes/google-vertex-google-vertex-anthropic-client.yaml @@ -0,0 +1,52 @@ +interactions: +- request: + body: '{"model":"claude-sonnet-4","max_tokens":1024,"messages":[{"role":"user","content":"What + is the capital of Brazil?"}]}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '117' + host: + - aiplatform.googleapis.com + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://aiplatform.googleapis.com/v1/projects/pydantic-ai/locations/global/publishers/anthropic/models/null:rawPredict? + response: + body: + string: !!binary | + H4sIAAAAAAAC/y2LMQqAMAwA974iZBZxcHIWRwXRWYqN4mCLTYqI9O9WdLw77lYASN47jxXcCRLO + zlCisiizT+zErNfXYU9HIBYyQFY2ueDUDNYJLC5Yk+N/sGgJ/A5tN0xNN7Y1phJVVA+26ZsFcgAA + AA== + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 06 Nov 2025 12:13:49 GMT + Server: + - scaffolding on HTTPServer2 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 404 + message: Not Found +version: 1 diff --git a/proxy-vcr/proxy_vcr/main.py b/proxy-vcr/proxy_vcr/main.py index 6ea9f23..d224333 100644 --- a/proxy-vcr/proxy_vcr/main.py +++ b/proxy-vcr/proxy_vcr/main.py @@ -47,19 +47,21 @@ async def proxy(request: Request) -> Response: # We should cache based on request body content, so we should make a hash of the request body. vcr_suffix = request.headers.get('x-vcr-filename', hashlib.sha256(body).hexdigest()) - if request.url.path.startswith('/openai'): + provider = select_provider(request) + + if provider == 'openai': client = cast(httpx.AsyncClient, request.scope['state']['httpx_client']) url = OPENAI_BASE_URL + request.url.path.strip('/openai') with vcr.use_cassette(cassette_name('openai', vcr_suffix)): # type: ignore[reportUnknownReturnType] headers = {'Authorization': auth_header, 'content-type': 'application/json'} response = await client.post(url, content=body, headers=headers) - elif request.url.path.startswith('/groq'): + elif provider == 'groq': client = cast(httpx.AsyncClient, request.scope['state']['httpx_client']) url = GROQ_BASE_URL + request.url.path[len('/groq') :] with vcr.use_cassette(cassette_name('groq', vcr_suffix)): # type: ignore[reportUnknownReturnType] headers = {'Authorization': auth_header, 'content-type': 'application/json'} response = await client.post(url, content=body, headers=headers) - elif request.url.path.startswith('/bedrock'): + elif provider == 'bedrock': client = cast(httpx.AsyncClient, request.scope['state']['httpx_client']) url = BEDROCK_BASE_URL + request.url.path[len('/bedrock') :] with vcr.use_cassette(cassette_name('bedrock', vcr_suffix)): # type: ignore[reportUnknownReturnType] @@ -69,7 +71,7 @@ async def proxy(request: Request) -> Response: 'x-amz-security-token': auth_header.replace('Bearer ', ''), } response = await client.post(url, content=body, headers=headers) - elif request.url.path.startswith('/anthropic'): + elif provider == 'anthropic': client = cast(httpx.AsyncClient, request.scope['state']['httpx_client']) url = ANTHROPIC_BASE_URL + request.url.path[len('/anthropic') :] api_key = request.headers.get('x-api-key', '') @@ -85,7 +87,7 @@ async def proxy(request: Request) -> Response: **anthropic_beta_headers, } response = await client.post(url, content=body, headers=headers) - elif request.url.path.startswith('/gemini'): + elif provider == 'google-vertex': client = cast(httpx.AsyncClient, request.scope['state']['httpx_client']) url = GOOGLE_BASE_URL + request.url.path[len('/gemini') :] + '?' + request.url.query headers = {'Authorization': auth_header, 'host': 'aiplatform.googleapis.com'} @@ -131,3 +133,23 @@ async def health_check(_: Request) -> Response: def cassette_name(provider: str, vcr_suffix: str) -> str: return f'{provider}-{vcr_suffix}.yaml' + + +def select_provider(request: Request) -> str: + vcr_filename = request.headers.get('x-vcr-filename', '') + + if vcr_filename == 'google-vertex-anthropic-client': + return 'google-vertex' + + if request.url.path.startswith('/openai'): + return 'openai' + elif request.url.path.startswith('/groq'): + return 'groq' + elif request.url.path.startswith('/bedrock'): + return 'bedrock' + elif request.url.path.startswith('/anthropic'): + return 'anthropic' + elif request.url.path.startswith('/gemini'): + return 'google-vertex' + else: + raise HTTPException(status_code=404, detail=f'Path {request.url.path} not supported') From 4f3e49161568aad01af7a3df5815568b8e2d18de Mon Sep 17 00:00:00 2001 From: Marcelo Trylesinski Date: Thu, 6 Nov 2025 13:35:54 +0100 Subject: [PATCH 2/3] Support Anthropic on Google Vertex --- gateway/src/providers/default.ts | 11 +-- gateway/src/providers/google/index.ts | 24 ++++--- gateway/test/providers/google.spec.ts | 2 +- gateway/test/providers/google.spec.ts.snap | 72 ++++++++++++++----- ...vertex-google-vertex-anthropic-client.yaml | 45 ++++++++---- proxy-vcr/proxy_vcr/main.py | 14 +++- 6 files changed, 118 insertions(+), 50 deletions(-) diff --git a/gateway/src/providers/default.ts b/gateway/src/providers/default.ts index dc0af96..a16554f 100644 --- a/gateway/src/providers/default.ts +++ b/gateway/src/providers/default.ts @@ -275,6 +275,12 @@ export class DefaultProviderProxy { return checkResult } + const prepResult = await this.prepRequest() + if ('error' in prepResult) { + return prepResult + } + const { requestBodyText, requestBodyData, requestModel } = prepResult + const method = this.method() const url = this.url() if (typeof url === 'object') { @@ -287,11 +293,6 @@ export class DefaultProviderProxy { requestHeaders.delete('authorization') await this.requestHeaders(requestHeaders) - const prepResult = await this.prepRequest() - if ('error' in prepResult) { - return prepResult - } - const { requestBodyText, requestBodyData, requestModel } = prepResult const response = await this.fetch(url, { method, headers: requestHeaders, body: requestBodyText }) if (this.isWhitelistedEndpoint()) { diff --git a/gateway/src/providers/google/index.ts b/gateway/src/providers/google/index.ts index 2e131bf..fe2fe4a 100644 --- a/gateway/src/providers/google/index.ts +++ b/gateway/src/providers/google/index.ts @@ -56,11 +56,11 @@ export class GoogleVertexProvider extends DefaultProviderProxy { */ private replacePath(projectId: string, region: string): null | string { const pathWithoutQuery = this.restOfPath.split('?')[0] - console.log('pathWithoutQuery', pathWithoutQuery) - if (pathWithoutQuery === 'v1/messages') { - console.log('this.shouldStream', this.shouldStream) - console.log('this.requestModel', this.requestModel) - const action = this.shouldStream ? 'streamRawPredict' : 'rawPredict' + + // Handle Anthropic client format: /v1/messages + if (pathWithoutQuery === 'v1/messages' && this.requestModel) { + // Always use streamRawPredict for Anthropic on Vertex (it handles both streaming and non-streaming) + const action = 'streamRawPredict' return `/v1/projects/${projectId}/locations/${region}/publishers/anthropic/models/${this.requestModel}:${action}` } @@ -83,7 +83,6 @@ export class GoogleVertexProvider extends DefaultProviderProxy { } const path = `/${version}/projects/${projectId}/locations/${region}/publishers/${publisher}/models/${modelAndApi}` - console.log('this.restOfPath', this.restOfPath) return path } @@ -98,13 +97,20 @@ export class GoogleVertexProvider extends DefaultProviderProxy { const pathWithoutQuery = this.restOfPath.split('?')[0] if (pathWithoutQuery === 'v1/messages') { - console.log('this is called after!') this.flavor = 'anthropic' if (!('model' in requestBodyData)) { return { error: 'model not found in Anthropic request body' } } - this.requestModel = requestBodyData.model as string - return { requestBodyText, requestBodyData, requestModel: this.requestModel } + const model = requestBodyData.model as string + this.requestModel = model + + // Remove the model from the request body since Google Vertex doesn't expect it + delete requestBodyData.model + + // Update requestBodyText without the model field + const updatedRequestBodyText = JSON.stringify(requestBodyData) + + return { requestBodyText: updatedRequestBodyText, requestBodyData, requestModel: model } } const m = /\/models\/(.+?):/.exec(this.restOfPath) diff --git a/gateway/test/providers/google.spec.ts b/gateway/test/providers/google.spec.ts index 4bd8b71..40ab359 100644 --- a/gateway/test/providers/google.spec.ts +++ b/gateway/test/providers/google.spec.ts @@ -87,6 +87,7 @@ describe('google', () => { model: 'claude-sonnet-4', max_tokens: 1024, messages: [{ role: 'user', content: 'What is the capital of Brazil?' }], + anthropic_version: 'vertex-2023-10-16', }) const response = await fetch('https://example.com/anthropic/v1/messages', { @@ -97,7 +98,6 @@ describe('google', () => { 'content-length': anthropicBody.length.toString(), 'x-vcr-filename': 'google-vertex-anthropic-client', 'pydantic-ai-gateway-profile': 'google-vertex', - 'anthropic-version': 'vertex-2023-10-16', }, body: anthropicBody, }) diff --git a/gateway/test/providers/google.spec.ts.snap b/gateway/test/providers/google.spec.ts.snap index 6750b83..7575b53 100644 --- a/gateway/test/providers/google.spec.ts.snap +++ b/gateway/test/providers/google.spec.ts.snap @@ -1,33 +1,67 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`google > google-vertex/anthropic-client > llm 1`] = `"{"error":{"code":404,"message":"Requested entity was not found.","status":"NOT_FOUND"}}"`; +exports[`google > google-vertex/anthropic-client > llm 1`] = `"{"model":"claude-sonnet-4-20250514","id":"msg_vrtx_01HqMgFLT1sMDSi7RpSM8tFc","type":"message","role":"assistant","content":[{"type":"text","text":"The capital of Brazil is Brasília. It became the country's capital in 1960, replacing Rio de Janeiro, and was specifically built to serve as the new capital city."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":14,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":40}}"`; exports[`google > google-vertex/anthropic-client > span 1`] = ` [ { "attributes": { - "http.request.body.text": "{"model":"claude-sonnet-4","max_tokens":1024,"messages":[{"role":"user","content":"What is the capital of Brazil?"}]}", - "http.request.header.anthropic-version": "vertex-2023-10-16", - "http.request.header.authorization": "healthy", - "http.request.header.content-length": "117", - "http.request.header.content-type": "application/json", - "http.request.header.pydantic-ai-gateway-profile": "google-vertex", - "http.request.header.x-vcr-filename": "google-vertex-anthropic-client", - "http.request.method": "POST", - "http.response.body.text": "{"error":{"code":404,"message":"Requested entity was not found.","status":"NOT_FOUND"}}", - "http.response.header.content-length": "87", - "http.response.header.content-type": "application/json", - "http.response.header.server": "uvicorn", - "http.response.status_code": 404, - "logfire.json_schema": "{"type":"object","properties":{"http.request.method":{"type":"string"},"url.full":{"type":"string"},"http.request.header.anthropic-version":{"type":"string"},"http.request.header.authorization":{"type":"string"},"http.request.header.content-length":{"type":"string"},"http.request.header.content-type":{"type":"string"},"http.request.header.pydantic-ai-gateway-profile":{"type":"string"},"http.request.header.x-vcr-filename":{"type":"string"},"http.response.status_code":{"type":"number"},"http.response.header.content-length":{"type":"string"},"http.response.header.content-type":{"type":"string"},"http.response.header.server":{"type":"string"},"http.request.body.text":{"type":"string"},"http.response.body.text":{"type":"string"}}}", - "logfire.level_num": 13, - "logfire.msg": "chat claude-sonnet-4, unexpected response: 404", - "url.full": "https://example.com/anthropic/v1/messages", + "gen_ai.input.messages": [ + { + "parts": [ + { + "content": "What is the capital of Brazil?", + "type": "text", + }, + ], + "role": "user", + }, + ], + "gen_ai.operation.name": "chat", + "gen_ai.output.messages": [ + { + "finish_reason": "end_turn", + "parts": [ + { + "content": "The capital of Brazil is Brasília. It became the country's capital in 1960, replacing Rio de Janeiro, and was specifically built to serve as the new capital city.", + "type": "text", + }, + ], + "role": "assistant", + }, + ], + "gen_ai.request.max_tokens": 1024, + "gen_ai.request.model": "claude-sonnet-4", + "gen_ai.request.seed": {}, + "gen_ai.request.stop_sequences": {}, + "gen_ai.request.temperature": {}, + "gen_ai.request.top_k": {}, + "gen_ai.request.top_p": {}, + "gen_ai.response.finish_reasons": [ + "end_turn", + ], + "gen_ai.response.id": "msg_vrtx_01HqMgFLT1sMDSi7RpSM8tFc", + "gen_ai.response.model": "claude-sonnet-4-20250514", + "gen_ai.system": "google-vertex", + "gen_ai.system_instructions": {}, + "gen_ai.usage.cache_audio_read_tokens": {}, + "gen_ai.usage.cache_read_tokens": 0, + "gen_ai.usage.cache_write_tokens": 0, + "gen_ai.usage.input_audio_tokens": {}, + "gen_ai.usage.input_tokens": 14, + "gen_ai.usage.output_audio_tokens": {}, + "gen_ai.usage.output_tokens": 40, + "http.request.body.text": "{"max_tokens":1024,"messages":[{"role":"user","content":"What is the capital of Brazil?"}],"anthropic_version":"vertex-2023-10-16"}", + "http.response.body.text": "{"model":"claude-sonnet-4-20250514","id":"msg_vrtx_01HqMgFLT1sMDSi7RpSM8tFc","type":"message","role":"assistant","content":[{"type":"text","text":"The capital of Brazil is Brasília. It became the country's capital in 1960, replacing Rio de Janeiro, and was specifically built to serve as the new capital city."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":14,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":40}}", + "http.response.status_code": 200, + "logfire.json_schema": "{"type":"object","properties":{"gen_ai.operation.name":{"type":"string"},"gen_ai.request.model":{"type":"string"},"gen_ai.system":{"type":"string"},"gen_ai.request.max_tokens":{"type":"number"},"gen_ai.request.top_k":{},"gen_ai.request.top_p":{},"gen_ai.request.temperature":{},"gen_ai.request.stop_sequences":{},"gen_ai.request.seed":{},"gen_ai.response.finish_reasons":{},"gen_ai.response.id":{"type":"string"},"gen_ai.input.messages":{},"gen_ai.output.messages":{},"gen_ai.system_instructions":{},"http.response.status_code":{"type":"number"},"http.request.body.text":{"type":"string"},"http.response.body.text":{"type":"string"},"gen_ai.response.model":{"type":"string"},"gen_ai.usage.input_tokens":{"type":"number"},"gen_ai.usage.cache_read_tokens":{"type":"number"},"gen_ai.usage.cache_write_tokens":{"type":"number"},"gen_ai.usage.output_tokens":{"type":"number"},"gen_ai.usage.input_audio_tokens":{},"gen_ai.usage.cache_audio_read_tokens":{},"gen_ai.usage.output_audio_tokens":{}}}", + "logfire.level_num": 9, + "logfire.msg": "chat claude-sonnet-4-20250514", }, "events": [], "kind": 1, "links": [], - "name": "chat claude-sonnet-4, unexpected response: {http.response.status_code}", + "name": "chat claude-sonnet-4-20250514", "parentSpanId": undefined, "resource": { "service.name": "PAIG", diff --git a/proxy-vcr/proxy_vcr/cassettes/google-vertex-google-vertex-anthropic-client.yaml b/proxy-vcr/proxy_vcr/cassettes/google-vertex-google-vertex-anthropic-client.yaml index 3faf39d..7c4dfe4 100644 --- a/proxy-vcr/proxy_vcr/cassettes/google-vertex-google-vertex-anthropic-client.yaml +++ b/proxy-vcr/proxy_vcr/cassettes/google-vertex-google-vertex-anthropic-client.yaml @@ -1,39 +1,40 @@ interactions: - request: - body: '{"model":"claude-sonnet-4","max_tokens":1024,"messages":[{"role":"user","content":"What - is the capital of Brazil?"}]}' + body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the capital + of Brazil?"}],"anthropic_version":"vertex-2023-10-16"}' headers: accept: - '*/*' accept-encoding: - gzip, deflate + anthropic-version: + - vertex-2023-10-16 connection: - keep-alive content-length: - - '117' + - '131' host: - aiplatform.googleapis.com user-agent: - python-httpx/0.28.1 method: POST - uri: https://aiplatform.googleapis.com/v1/projects/pydantic-ai/locations/global/publishers/anthropic/models/null:rawPredict? + uri: https://aiplatform.googleapis.com/v1/projects/pydantic-ai/locations/global/publishers/anthropic/models/claude-sonnet-4:streamRawPredict response: body: string: !!binary | - H4sIAAAAAAAC/y2LMQqAMAwA974iZBZxcHIWRwXRWYqN4mCLTYqI9O9WdLw77lYASN47jxXcCRLO - zlCisiizT+zErNfXYU9HIBYyQFY2ueDUDNYJLC5Yk+N/sGgJ/A5tN0xNN7Y1phJVVA+26ZsFcgAA - AA== + H4sIAAAAAAAC/2SQTU4DMQyFr2Jlw2aKZlD5myVCFSC6KewQGqUZ01qkzjR2KKXqkTgFFyNT0QrE + ytb7np+StzGL0KI3tXHephYHEphRB8PBSXlyWp5WQ1MYajNfyKx5i/relNXNcjwb3T9WMr5+oPNJ + 9zC+0JHLRl132FtRxM4wCzH4XrAiJGpZs+QCK+atftrs/YrvPdmN2jzOEZztSK2H8AJX0X6QB5J+ + k69PT/YYbhWm6OwCQXt3SKxxfSSHO2KoLs/KAiJ23jriGUwoQItwZxkphgIst7CyAtKhoxdy1vs1 + TBN5BQ0gGN8QMu7zGVeHZEe6Pjbb58KIhq6JaHNj+dXIbaMpsvkBgsuE7PL3OHlfmLRrpN4Y4i5p + o+EVWUxdDXMj1s2xcTlKKXDz11DuecbtfxaS/laG5Xb7DQAA//8DAC9RKlfTAQAA headers: Alt-Svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 Content-Encoding: - gzip Content-Type: - - application/json; charset=UTF-8 - Date: - - Thu, 06 Nov 2025 12:13:49 GMT - Server: - - scaffolding on HTTPServer2 + - application/json Transfer-Encoding: - chunked Vary: @@ -46,7 +47,23 @@ interactions: - SAMEORIGIN X-XSS-Protection: - '0' + date: + - Thu, 06 Nov 2025 12:34:32 GMT + request-id: + - req_vrtx_011CUreHywvBewqNvth7FFMK + server: + - hypercorn-h11 + x-vertex-ai-cache-creation-input-tokens: + - '0' + x-vertex-ai-cache-read-input-tokens: + - '0' + x-vertex-ai-input-tokens: + - '14' + x-vertex-ai-output-tokens: + - '40' + x-vertex-ai-received-request-id: + - 65cdc8b3-f380-49b1-953b-85431149ff5c status: - code: 404 - message: Not Found + code: 200 + message: OK version: 1 diff --git a/proxy-vcr/proxy_vcr/main.py b/proxy-vcr/proxy_vcr/main.py index d224333..c4714af 100644 --- a/proxy-vcr/proxy_vcr/main.py +++ b/proxy-vcr/proxy_vcr/main.py @@ -89,9 +89,19 @@ async def proxy(request: Request) -> Response: response = await client.post(url, content=body, headers=headers) elif provider == 'google-vertex': client = cast(httpx.AsyncClient, request.scope['state']['httpx_client']) - url = GOOGLE_BASE_URL + request.url.path[len('/gemini') :] + '?' + request.url.query - headers = {'Authorization': auth_header, 'host': 'aiplatform.googleapis.com'} + url = ( + GOOGLE_BASE_URL + + request.url.path[len('/gemini') :] + + ('?' + request.url.query if request.url.query else '') + ) + print(request.headers.get('anthropic-version')) + headers = { + 'Authorization': auth_header, + 'host': 'aiplatform.googleapis.com', + 'anthropic-version': request.headers.get('anthropic-version', 'vertex-2023-10-16'), + } # It's a bit weird, but if we don't set the host header, it will fail. This seems very weird from Google's side. + print(url) with vcr.use_cassette(cassette_name('google-vertex', vcr_suffix)): # type: ignore[reportUnknownReturnType] response = await client.post(url, content=body, headers=headers) else: From 19addb934b1e63e53468089d618b8fc31e7110bd Mon Sep 17 00:00:00 2001 From: Marcelo Trylesinski Date: Fri, 7 Nov 2025 11:54:10 +0100 Subject: [PATCH 3/3] Add note and uncomment --- gateway/test/setup.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gateway/test/setup.ts b/gateway/test/setup.ts index bea3cd7..70f892f 100644 --- a/gateway/test/setup.ts +++ b/gateway/test/setup.ts @@ -52,9 +52,10 @@ function testGateway(): TestGateway { const bodyArray = init?.body as Uint8Array otelBatch.push(new TextDecoder().decode(bodyArray)) return new Response('OK', { status: 200 }) - // } else if (hostname === 'oauth2.googleapis.com') { - // // Mock GCP token response for tests - // return new Response(JSON.stringify({ access_token: 'mock-gcp-token' }), { status: 200 }) + // This line needs to be disabled when creating the cassettes locally. + } else if (hostname === 'oauth2.googleapis.com') { + // Mock GCP token response for tests + return new Response(JSON.stringify({ access_token: 'mock-gcp-token' }), { status: 200 }) } else { return await fetch(url, init) }