Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions gateway/src/providers/default.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ export class DefaultProviderProxy {
protected middlewares: Middleware[]
protected otelSpan: OtelSpan

protected requestModel: string | null = null
// NOTE: Those fields are used only for streaming responses for the time being.
protected usage: Usage | null = null
protected responseModel: string | null = null
Expand Down Expand Up @@ -274,6 +275,12 @@ export class DefaultProviderProxy {
return checkResult
}

const prepResult = await this.prepRequest()
if ('error' in prepResult) {
return prepResult
}
const { requestBodyText, requestBodyData, requestModel } = prepResult
Comment on lines +278 to +282
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem to cause any issue. I just put it up.


const method = this.method()
const url = this.url()
if (typeof url === 'object') {
Expand All @@ -286,11 +293,6 @@ export class DefaultProviderProxy {
requestHeaders.delete('authorization')
await this.requestHeaders(requestHeaders)

const prepResult = await this.prepRequest()
if ('error' in prepResult) {
return prepResult
}
const { requestBodyText, requestBodyData, requestModel } = prepResult
const response = await this.fetch(url, { method, headers: requestHeaders, body: requestBodyText })

if (this.isWhitelistedEndpoint()) {
Expand Down
44 changes: 41 additions & 3 deletions gateway/src/providers/google/index.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import type { ModelAPI } from '../../api'
import { AnthropicAPI } from '../../api/anthropic'
import { GoogleAPI, type GoogleRequest } from '../../api/google'
import { DefaultProviderProxy } from '../default'
import { GoogleAPI } from '../../api/google'
import { DefaultProviderProxy, type JsonData } from '../default'
import { authToken, getProjectId } from './auth'

export class GoogleVertexProvider extends DefaultProviderProxy {
protected usageField = 'usageMetadata'
flavor: 'default' | 'anthropic' = 'default'
shouldStream: boolean = false

url() {
if (this.providerProxy.baseUrl) {
Expand Down Expand Up @@ -54,6 +55,15 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
* @param region - The region to replace in the path.
*/
private replacePath(projectId: string, region: string): null | string {
const pathWithoutQuery = this.restOfPath.split('?')[0]

// Handle Anthropic client format: /v1/messages
if (pathWithoutQuery === 'v1/messages' && this.requestModel) {
// Always use streamRawPredict for Anthropic on Vertex (it handles both streaming and non-streaming)
const action = 'streamRawPredict'
return `/v1/projects/${projectId}/locations/${region}/publishers/anthropic/models/${this.requestModel}:${action}`
}

// Regex with capture groups: version (optional), publisher (optional), model
// Path may or may not start with / and may or may not have version
const regex =
Expand All @@ -78,12 +88,31 @@ export class GoogleVertexProvider extends DefaultProviderProxy {

async prepRequest() {
const requestBodyText = await this.request.text()
let requestBodyData: GoogleRequest
let requestBodyData: JsonData
try {
requestBodyData = JSON.parse(requestBodyText)
} catch (_error) {
return { error: 'invalid request JSON' }
}

const pathWithoutQuery = this.restOfPath.split('?')[0]
if (pathWithoutQuery === 'v1/messages') {
this.flavor = 'anthropic'
if (!('model' in requestBodyData)) {
return { error: 'model not found in Anthropic request body' }
}
const model = requestBodyData.model as string
this.requestModel = model

// Remove the model from the request body since Google Vertex doesn't expect it
delete requestBodyData.model

// Update requestBodyText without the model field
const updatedRequestBodyText = JSON.stringify(requestBodyData)

return { requestBodyText: updatedRequestBodyText, requestBodyData, requestModel: model }
}
Comment on lines +98 to +114
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we okay with this level of intrusiveness?


const m = /\/models\/(.+?):/.exec(this.restOfPath)
if (m) {
return { requestBodyText, requestBodyData, requestModel: m[1] }
Expand All @@ -92,6 +121,15 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
}
}

protected isStreaming(responseHeaders: Headers, requestBodyData: object): boolean {
if (this.flavor === 'anthropic') {
this.shouldStream = !!('stream' in requestBodyData && requestBodyData.stream === true)
} else {
this.shouldStream = super.isStreaming(responseHeaders, requestBodyData)
}
return this.shouldStream
}

async requestHeaders(headers: Headers): Promise<void> {
const token = await authToken(this.providerProxy.credentials, this.options.kv, this.options.subFetch)
headers.set('Authorization', `Bearer ${token}`)
Expand Down
29 changes: 29 additions & 0 deletions gateway/test/providers/google.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,33 @@ describe('google', () => {
expect(otelBatch, 'otelBatch length not 1').toHaveLength(1)
expect(deserializeRequest(otelBatch[0]!)).toMatchSnapshot('span')
})

test('google-vertex/anthropic-client', async ({ gateway }) => {
const { fetch, otelBatch } = gateway

const anthropicBody = JSON.stringify({
model: 'claude-sonnet-4',
max_tokens: 1024,
messages: [{ role: 'user', content: 'What is the capital of Brazil?' }],
anthropic_version: 'vertex-2023-10-16',
})

const response = await fetch('https://example.com/anthropic/v1/messages', {
method: 'POST',
headers: {
Authorization: 'healthy',
'content-type': 'application/json',
'content-length': anthropicBody.length.toString(),
'x-vcr-filename': 'google-vertex-anthropic-client',
'pydantic-ai-gateway-profile': 'google-vertex',
},
body: anthropicBody,
})

const content = await response.text()

expect(content).toMatchSnapshot('llm')
expect(otelBatch, 'otelBatch length not 1').toHaveLength(1)
expect(deserializeRequest(otelBatch[0]!)).toMatchSnapshot('span')
})
})
75 changes: 75 additions & 0 deletions gateway/test/providers/google.spec.ts.snap
Original file line number Diff line number Diff line change
@@ -1,5 +1,80 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html

exports[`google > google-vertex/anthropic-client > llm 1`] = `"{"model":"claude-sonnet-4-20250514","id":"msg_vrtx_01HqMgFLT1sMDSi7RpSM8tFc","type":"message","role":"assistant","content":[{"type":"text","text":"The capital of Brazil is Brasília. It became the country's capital in 1960, replacing Rio de Janeiro, and was specifically built to serve as the new capital city."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":14,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":40}}"`;

exports[`google > google-vertex/anthropic-client > span 1`] = `
[
{
"attributes": {
"gen_ai.input.messages": [
{
"parts": [
{
"content": "What is the capital of Brazil?",
"type": "text",
},
],
"role": "user",
},
],
"gen_ai.operation.name": "chat",
"gen_ai.output.messages": [
{
"finish_reason": "end_turn",
"parts": [
{
"content": "The capital of Brazil is Brasília. It became the country's capital in 1960, replacing Rio de Janeiro, and was specifically built to serve as the new capital city.",
"type": "text",
},
],
"role": "assistant",
},
],
"gen_ai.request.max_tokens": 1024,
"gen_ai.request.model": "claude-sonnet-4",
"gen_ai.request.seed": {},
"gen_ai.request.stop_sequences": {},
"gen_ai.request.temperature": {},
"gen_ai.request.top_k": {},
"gen_ai.request.top_p": {},
"gen_ai.response.finish_reasons": [
"end_turn",
],
"gen_ai.response.id": "msg_vrtx_01HqMgFLT1sMDSi7RpSM8tFc",
"gen_ai.response.model": "claude-sonnet-4-20250514",
"gen_ai.system": "google-vertex",
"gen_ai.system_instructions": {},
"gen_ai.usage.cache_audio_read_tokens": {},
"gen_ai.usage.cache_read_tokens": 0,
"gen_ai.usage.cache_write_tokens": 0,
"gen_ai.usage.input_audio_tokens": {},
"gen_ai.usage.input_tokens": 14,
"gen_ai.usage.output_audio_tokens": {},
"gen_ai.usage.output_tokens": 40,
"http.request.body.text": "{"max_tokens":1024,"messages":[{"role":"user","content":"What is the capital of Brazil?"}],"anthropic_version":"vertex-2023-10-16"}",
"http.response.body.text": "{"model":"claude-sonnet-4-20250514","id":"msg_vrtx_01HqMgFLT1sMDSi7RpSM8tFc","type":"message","role":"assistant","content":[{"type":"text","text":"The capital of Brazil is Brasília. It became the country's capital in 1960, replacing Rio de Janeiro, and was specifically built to serve as the new capital city."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":14,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":40}}",
"http.response.status_code": 200,
"logfire.json_schema": "{"type":"object","properties":{"gen_ai.operation.name":{"type":"string"},"gen_ai.request.model":{"type":"string"},"gen_ai.system":{"type":"string"},"gen_ai.request.max_tokens":{"type":"number"},"gen_ai.request.top_k":{},"gen_ai.request.top_p":{},"gen_ai.request.temperature":{},"gen_ai.request.stop_sequences":{},"gen_ai.request.seed":{},"gen_ai.response.finish_reasons":{},"gen_ai.response.id":{"type":"string"},"gen_ai.input.messages":{},"gen_ai.output.messages":{},"gen_ai.system_instructions":{},"http.response.status_code":{"type":"number"},"http.request.body.text":{"type":"string"},"http.response.body.text":{"type":"string"},"gen_ai.response.model":{"type":"string"},"gen_ai.usage.input_tokens":{"type":"number"},"gen_ai.usage.cache_read_tokens":{"type":"number"},"gen_ai.usage.cache_write_tokens":{"type":"number"},"gen_ai.usage.output_tokens":{"type":"number"},"gen_ai.usage.input_audio_tokens":{},"gen_ai.usage.cache_audio_read_tokens":{},"gen_ai.usage.output_audio_tokens":{}}}",
"logfire.level_num": 9,
"logfire.msg": "chat claude-sonnet-4-20250514",
},
"events": [],
"kind": 1,
"links": [],
"name": "chat claude-sonnet-4-20250514",
"parentSpanId": undefined,
"resource": {
"service.name": "PAIG",
"service.version": "test",
},
"scope": "pydantic-ai-gateway",
"status": {
"code": 1,
},
},
]
`;

exports[`google > google-vertex/default > llm 1`] = `
"data: {"candidates": [{"content": {"role": "model","parts": [{"functionCall": {"name": "final_result","args": {"name": "Samuel","city": "London","dob": "1987-01-28"}},"thoughtSignature": "CswDAePx/15lDrSIcIjN85FpyyOl3oASu2R23QD4Z3cj5XRUKOD/3/mMqcHv5AeXc/L+P1eVNpq5C3xM9/8zil6gOOZI91F/r1kmKvxmCECD1SW+p1dtzG6eljHd51vd2Gx7eqtEek1ORzeLP4zSWY2GDlZA9fZIs/uIfhLyOlkiiB1P/GAAUmBPT/TZqdOpQZXBt8MAUrbTOQfbhQ1qbxdrYRveZRMzXS898K6NmjrN5quNiaUgwEbc2J6NAoDOl5jdK8tIt7m25qdpjSYMpAGYD0c0Le2yPf8eO6A9J6zYp1lqVCTifby4/nP5RkVM2e1L4pH6oYisgQsyDUSEDCSG2GXhGO9WU5wFYwkjlhB8ghLU92kVgr/Rq54K/0GaYGBgzKi+YrD8c6QKSTZWTw/46D8lQ1goL1Y1YdtRiRGNFRJqKmSbkxjIOQoIk4glxcsm7L2lucL4miz4yZioBe7HEPDtmQY0FEKM5DQhwuj+AF1bBl5WE/9NYUsuGY3DxSi0lVd2v8+zd0op9U8z3PX4cmdKVkf/kA9TuQSPXBZpYgj7CssoMBT4ssLVoUgetOrNvJL99liSvsPOBZaLDKpNTXQA9GdZvQltoSolOg=="}]},"finishReason": "STOP","avgLogprobs": -0.48033348719278973}],"usageMetadata": {"promptTokenCount": 79,"candidatesTokenCount": 18,"totalTokenCount": 240,"trafficType": "ON_DEMAND","promptTokensDetails": [{"modality": "TEXT","tokenCount": 79}],"candidatesTokensDetails": [{"modality": "TEXT","tokenCount": 18}],"thoughtsTokenCount": 143},"modelVersion": "gemini-2.5-flash","createTime": "2025-11-04T15:31:56.289691Z","responseId": "bBwKaZvXEemDn9kP56mN8QI"}

Expand Down
1 change: 1 addition & 0 deletions gateway/test/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ function testGateway(): TestGateway {
const bodyArray = init?.body as Uint8Array
otelBatch.push(new TextDecoder().decode(bodyArray))
return new Response('OK', { status: 200 })
// This line needs to be disabled when creating the cassettes locally.
} else if (hostname === 'oauth2.googleapis.com') {
// Mock GCP token response for tests
return new Response(JSON.stringify({ access_token: 'mock-gcp-token' }), { status: 200 })
Expand Down
1 change: 1 addition & 0 deletions gateway/test/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class TestKeysDB extends KeysDbD1 {
injectCost: true,
credentials: env.GOOGLE_SERVICE_ACCOUNT_KEY,
apiTypes: ['gemini', 'anthropic'],
profile: 'google-vertex',
},
]
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
interactions:
- request:
body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the capital
of Brazil?"}],"anthropic_version":"vertex-2023-10-16"}'
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
anthropic-version:
- vertex-2023-10-16
connection:
- keep-alive
content-length:
- '131'
host:
- aiplatform.googleapis.com
user-agent:
- python-httpx/0.28.1
method: POST
uri: https://aiplatform.googleapis.com/v1/projects/pydantic-ai/locations/global/publishers/anthropic/models/claude-sonnet-4:streamRawPredict
response:
body:
string: !!binary |
H4sIAAAAAAAC/2SQTU4DMQyFr2Jlw2aKZlD5myVCFSC6KewQGqUZ01qkzjR2KKXqkTgFFyNT0QrE
ytb7np+StzGL0KI3tXHephYHEphRB8PBSXlyWp5WQ1MYajNfyKx5i/relNXNcjwb3T9WMr5+oPNJ
9zC+0JHLRl132FtRxM4wCzH4XrAiJGpZs+QCK+atftrs/YrvPdmN2jzOEZztSK2H8AJX0X6QB5J+
k69PT/YYbhWm6OwCQXt3SKxxfSSHO2KoLs/KAiJ23jriGUwoQItwZxkphgIst7CyAtKhoxdy1vs1
TBN5BQ0gGN8QMu7zGVeHZEe6Pjbb58KIhq6JaHNj+dXIbaMpsvkBgsuE7PL3OHlfmLRrpN4Y4i5p
o+EVWUxdDXMj1s2xcTlKKXDz11DuecbtfxaS/laG5Xb7DQAA//8DAC9RKlfTAQAA
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Encoding:
- gzip
Content-Type:
- application/json
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-XSS-Protection:
- '0'
date:
- Thu, 06 Nov 2025 12:34:32 GMT
request-id:
- req_vrtx_011CUreHywvBewqNvth7FFMK
server:
- hypercorn-h11
x-vertex-ai-cache-creation-input-tokens:
- '0'
x-vertex-ai-cache-read-input-tokens:
- '0'
x-vertex-ai-input-tokens:
- '14'
x-vertex-ai-output-tokens:
- '40'
x-vertex-ai-received-request-id:
- 65cdc8b3-f380-49b1-953b-85431149ff5c
status:
code: 200
message: OK
version: 1
46 changes: 39 additions & 7 deletions proxy-vcr/proxy_vcr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,21 @@ async def proxy(request: Request) -> Response:
# We should cache based on request body content, so we should make a hash of the request body.
vcr_suffix = request.headers.get('x-vcr-filename', hashlib.sha256(body).hexdigest())

if request.url.path.startswith('/openai'):
provider = select_provider(request)

if provider == 'openai':
client = cast(httpx.AsyncClient, request.scope['state']['httpx_client'])
url = OPENAI_BASE_URL + request.url.path.strip('/openai')
with vcr.use_cassette(cassette_name('openai', vcr_suffix)): # type: ignore[reportUnknownReturnType]
headers = {'Authorization': auth_header, 'content-type': 'application/json'}
response = await client.post(url, content=body, headers=headers)
elif request.url.path.startswith('/groq'):
elif provider == 'groq':
client = cast(httpx.AsyncClient, request.scope['state']['httpx_client'])
url = GROQ_BASE_URL + request.url.path[len('/groq') :]
with vcr.use_cassette(cassette_name('groq', vcr_suffix)): # type: ignore[reportUnknownReturnType]
headers = {'Authorization': auth_header, 'content-type': 'application/json'}
response = await client.post(url, content=body, headers=headers)
elif request.url.path.startswith('/bedrock'):
elif provider == 'bedrock':
client = cast(httpx.AsyncClient, request.scope['state']['httpx_client'])
url = BEDROCK_BASE_URL + request.url.path[len('/bedrock') :]
with vcr.use_cassette(cassette_name('bedrock', vcr_suffix)): # type: ignore[reportUnknownReturnType]
Expand All @@ -69,7 +71,7 @@ async def proxy(request: Request) -> Response:
'x-amz-security-token': auth_header.replace('Bearer ', ''),
}
response = await client.post(url, content=body, headers=headers)
elif request.url.path.startswith('/anthropic'):
elif provider == 'anthropic':
client = cast(httpx.AsyncClient, request.scope['state']['httpx_client'])
url = ANTHROPIC_BASE_URL + request.url.path[len('/anthropic') :]
api_key = request.headers.get('x-api-key', '')
Expand All @@ -85,11 +87,21 @@ async def proxy(request: Request) -> Response:
**anthropic_beta_headers,
}
response = await client.post(url, content=body, headers=headers)
elif request.url.path.startswith('/gemini'):
elif provider == 'google-vertex':
client = cast(httpx.AsyncClient, request.scope['state']['httpx_client'])
url = GOOGLE_BASE_URL + request.url.path[len('/gemini') :] + '?' + request.url.query
headers = {'Authorization': auth_header, 'host': 'aiplatform.googleapis.com'}
url = (
GOOGLE_BASE_URL
+ request.url.path[len('/gemini') :]
+ ('?' + request.url.query if request.url.query else '')
)
print(request.headers.get('anthropic-version'))
headers = {
'Authorization': auth_header,
'host': 'aiplatform.googleapis.com',
'anthropic-version': request.headers.get('anthropic-version', 'vertex-2023-10-16'),
}
# It's a bit weird, but if we don't set the host header, it will fail. This seems very weird from Google's side.
print(url)
with vcr.use_cassette(cassette_name('google-vertex', vcr_suffix)): # type: ignore[reportUnknownReturnType]
response = await client.post(url, content=body, headers=headers)
else:
Expand Down Expand Up @@ -131,3 +143,23 @@ async def health_check(_: Request) -> Response:

def cassette_name(provider: str, vcr_suffix: str) -> str:
return f'{provider}-{vcr_suffix}.yaml'


def select_provider(request: Request) -> str:
vcr_filename = request.headers.get('x-vcr-filename', '')

if vcr_filename == 'google-vertex-anthropic-client':
return 'google-vertex'

if request.url.path.startswith('/openai'):
return 'openai'
elif request.url.path.startswith('/groq'):
return 'groq'
elif request.url.path.startswith('/bedrock'):
return 'bedrock'
elif request.url.path.startswith('/anthropic'):
return 'anthropic'
elif request.url.path.startswith('/gemini'):
return 'google-vertex'
else:
raise HTTPException(status_code=404, detail=f'Path {request.url.path} not supported')