pydantic
diff --git a/‎deploy/test/index.spec.ts.snap‎
Lines changed: 4 additions & 4 deletions b/‎deploy/test/index.spec.ts.snap‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/pai_openai_stream.py‎
Lines changed: 25 additions & 3 deletions b/‎examples/pai_openai_stream.py‎
Lines changed: 25 additions & 3 deletions
diff --git a/‎gateway/src/api/anthropic.ts‎
Lines changed: 50 additions & 4 deletions b/‎gateway/src/api/anthropic.ts‎
Lines changed: 50 additions & 4 deletions
diff --git a/‎gateway/src/api/base.ts‎
Lines changed: 107 additions & 8 deletions b/‎gateway/src/api/base.ts‎
Lines changed: 107 additions & 8 deletions
diff --git a/‎gateway/src/api/chat.ts‎
Lines changed: 54 additions & 2 deletions b/‎gateway/src/api/chat.ts‎
Lines changed: 54 additions & 2 deletions
@@ -14,8 +14,8 @@ exports[`deploy > should call openai via gateway > llm 1`] = `
       },
     },
   ],
-  "created": 1758119078,
-  "id": "chatcmpl-CGnNGUOMdokZ0Ph0QUfUsK19MdByR",
+  "created": 1761828474,
+  "id": "chatcmpl-CWMMElxV7Z5jV4zs2g2cRQjZTsY8M",
   "model": "gpt-5-2025-08-07",
   "object": "chat.completion",
   "service_tier": "default",
@@ -118,7 +118,7 @@ exports[`deploy > should call openai via gateway > span 1`] = `
   {
     "key": "gen_ai.response.id",
     "value": {
-      "stringValue": "chatcmpl-CGnNGUOMdokZ0Ph0QUfUsK19MdByR",
+      "stringValue": "chatcmpl-CWMMElxV7Z5jV4zs2g2cRQjZTsY8M",
     },
   },
   {
@@ -293,7 +293,7 @@ exports[`deploy > should call openai via gateway > span 1`] = `
   {
     "key": "http.response.body.text",
     "value": {
-      "stringValue": "{"id":"chatcmpl-CGnNGUOMdokZ0Ph0QUfUsK19MdByR","object":"chat.completion","created":1758119078,"model":"gpt-5-2025-08-07","choices":[{"index":0,"message":{"role":"assistant","content":"Paris.","refusal":null,"annotations":[]},"finish_reason":"stop"}],"usage":{"prompt_tokens":23,"completion_tokens":75,"total_tokens":98,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":64,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0},"pydantic_ai_gateway":{"cost_estimate":0.00077875}},"service_tier":"default","system_fingerprint":null}",
+      "stringValue": "{"id":"chatcmpl-CWMMElxV7Z5jV4zs2g2cRQjZTsY8M","object":"chat.completion","created":1761828474,"model":"gpt-5-2025-08-07","choices":[{"index":0,"message":{"role":"assistant","content":"Paris.","refusal":null,"annotations":[]},"finish_reason":"stop"}],"usage":{"prompt_tokens":23,"completion_tokens":75,"total_tokens":98,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":64,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0},"pydantic_ai_gateway":{"cost_estimate":0.00077875}},"service_tier":"default","system_fingerprint":null}",
     },
   },
   {
 
@@ -1,19 +1,41 @@
 import asyncio
+from datetime import date
 
 import logfire
+from pydantic import BaseModel, field_validator
 from pydantic_ai import Agent
 
-logfire.configure(service_name='testing', send_to_logfire=False)
+logfire.configure(service_name='testing')
 logfire.instrument_pydantic_ai()
 logfire.instrument_httpx(capture_all=True)
 
+
+class Person(BaseModel, use_attribute_docstrings=True):
+    name: str
+    """The name of the person."""
+    dob: date
+    """The date of birth of the person. MUST BE A VALID ISO 8601 date."""
+    city: str
+    """The city where the person lives."""
+
+    @field_validator('dob')
+    def validate_dob(cls, v: date) -> date:
+        if v >= date(1900, 1, 1):
+            raise ValueError('The person must be born in the 19th century')
+        return v
+
+
 person_agent = Agent(
-    'gateway:openai/gpt-5', instructions='You are a helpful assistant.', model_settings={'max_tokens': 1024}, retries=0
+    'gateway:openai/gpt-5',
+    instructions='You are a helpful assistant.',
+    model_settings={'max_tokens': 1024},
+    retries=2,
+    output_type=Person,
 )
 
 
 async def main():
-    async for event in person_agent.run_stream_events('What is the capital of France?'):
+    async for event in person_agent.run_stream_events("Samuel lived in London and was born on Jan 28th '87"):
         print(repr(event))
 
 
 
@@ -2,14 +2,13 @@ import type {
   BetaContentBlock,
   BetaContentBlockParam,
   BetaMessage,
+  BetaRawMessageStreamEvent,
   MessageCreateParams,
 } from '@anthropic-ai/sdk/resources/beta'
 import type { InputMessages, JsonValue, MessagePart, OutputMessages, TextPart } from '../otel/genai'
-import { BaseAPI } from './base'
+import { BaseAPI, type ExtractedRequest, type ExtractedResponse, type ExtractorConfig } from './base'
 
-// TODO(Marcelo): We use the beta API in PydanticAI, but does it matter here?
-
-export class AnthropicAPI extends BaseAPI<MessageCreateParams, BetaMessage> {
+export class AnthropicAPI extends BaseAPI<MessageCreateParams, BetaMessage, BetaRawMessageStreamEvent> {
   requestStopSequences = (requestBody: MessageCreateParams): string[] | undefined => requestBody.stop_sequences
   requestTemperature = (requestBody: MessageCreateParams): number | undefined => requestBody.temperature
   requestTopK = (requestBody: MessageCreateParams): number | undefined => requestBody.top_k
@@ -51,6 +50,53 @@ export class AnthropicAPI extends BaseAPI<MessageCreateParams, BetaMessage> {
       },
     ]
   }
+
+  // SafeExtractor implementation
+
+  requestExtractors: ExtractorConfig<MessageCreateParams, ExtractedRequest> = {
+    requestModel: (requestBody: MessageCreateParams) => {
+      this.extractedRequest.requestModel = requestBody.model ?? undefined
+    },
+    maxTokens: (requestBody: MessageCreateParams) => {
+      this.extractedRequest.maxTokens = requestBody.max_tokens ?? undefined
+    },
+    temperature: (requestBody: MessageCreateParams) => {
+      this.extractedRequest.temperature = requestBody.temperature ?? undefined
+    },
+    topK: (requestBody: MessageCreateParams) => {
+      this.extractedRequest.topK = requestBody.top_k ?? undefined
+    },
+    topP: (requestBody: MessageCreateParams) => {
+      this.extractedRequest.topP = requestBody.top_p ?? undefined
+    },
+    stopSequences: (requestBody: MessageCreateParams) => {
+      this.extractedRequest.stopSequences = requestBody.stop_sequences ?? undefined
+    },
+    systemInstructions: (requestBody: MessageCreateParams) => {
+      this.extractedRequest.systemInstructions = this.systemInstructions(requestBody)
+    },
+  }
+
+  chunkExtractors: ExtractorConfig<BetaRawMessageStreamEvent, ExtractedResponse> = {
+    usage: (chunk: BetaRawMessageStreamEvent) => {
+      if ('usage' in chunk && chunk.usage) {
+        this.extractedResponse.usage = this.extractUsage(chunk)
+      }
+    },
+    responseModel: (chunk: BetaRawMessageStreamEvent) => {
+      if (chunk.type === 'message_start') {
+        this.extractedResponse.responseModel = chunk.message.model
+      }
+    },
+    responseId: (chunk: BetaRawMessageStreamEvent) => {
+      if (chunk.type === 'message_start') {
+        this.extractedResponse.responseId = chunk.message.id
+      }
+    },
+    finishReasons: (_chunk: BetaRawMessageStreamEvent) => {},
+    // TODO(Marcelo): We should implement this one.
+    outputMessages: (_chunk: BetaRawMessageStreamEvent) => {},
+  }
 }
 
 function mapParts(content: string | BetaContentBlockParam[] | BetaContentBlock[]): MessagePart[] {
 
@@ -4,31 +4,124 @@ import type { InputMessages, OutputMessages, TextPart } from '../otel/genai'
 import { type JsonData, safe } from '../providers/default'
 import type { ProviderID } from '../types'
 
-export abstract class BaseAPI<RequestBody, ResponseBody>
-  implements GenAIAttributesExtractor<RequestBody, ResponseBody>
+export interface ExtractedRequest {
+  requestModel?: string
+  temperature?: number
+  maxTokens?: number
+  systemInstructions?: TextPart[]
+  topP?: number
+  topK?: number
+  stopSequences?: string[]
+  seed?: number
+  inputMessages?: InputMessages
+}
+
+export interface ExtractedResponse {
+  responseModel: string
+  responseId: string
+  finishReasons: string[]
+  outputMessages: OutputMessages
+  usage: Usage
+}
+
+export type FieldExtractor<Data> = (data: Data) => void
+
+export type ExtractorConfig<Data, Target> = {
+  [K in keyof Target]?: FieldExtractor<Data>
+}
+
+export type ExtractedData = ExtractedRequest & ExtractedResponse
+
+export interface SafeExtractor<RequestBody, ResponseBody, StreamChunk> {
+  extractedRequest: ExtractedRequest
+  extractedResponse: Partial<ExtractedResponse>
+
+  processRequest(request: RequestBody): void
+  requestExtractors: ExtractorConfig<RequestBody, ExtractedRequest>
+
+  processResponse(response: ResponseBody): void
+
+  processChunk(chunk: StreamChunk): void
+  chunkExtractors: ExtractorConfig<StreamChunk, ExtractedResponse>
+}
+
+export abstract class BaseAPI<RequestBody, ResponseBody, StreamChunk = JsonData>
+  implements GenAIAttributesExtractor<RequestBody, ResponseBody>, SafeExtractor<RequestBody, ResponseBody, StreamChunk>
 {
   /** @apiFlavor: the flavor of the API, used to determine the response model and usage */
   apiFlavor: string | undefined = undefined
 
   readonly providerId: ProviderID
   readonly requestModel?: string
 
+  extractedRequest: ExtractedRequest = {}
+  extractedResponse: Partial<ExtractedResponse> = {}
+
   constructor(providerId: ProviderID, requestModel?: string) {
     this.providerId = providerId
     this.requestModel = requestModel
   }
 
-  // TODO(Marcelo): This is not used anywhere yet! We should remove this note when we use it.
-  extractUsage(responseBody: ResponseBody): Usage | undefined {
-    const provider = findProvider({ providerId: this.providerId })
-    if (!provider) {
-      // This should never happen, but we will throw an error to be safe.
-      throw new Error(`Provider not found for provider ID: ${this.providerId}`)
+  requestExtractors: ExtractorConfig<RequestBody, ExtractedRequest> = {}
+  chunkExtractors: ExtractorConfig<StreamChunk, ExtractedResponse> = {}
+
+  processRequest(request: RequestBody): void {
+    for (const extractor of Object.values(this.requestExtractors)) {
+      safe(extractor)(request)
+    }
+  }
+
+  processResponse(_response: ResponseBody): void {
+    throw new Error('Method not implemented.')
+  }
+
+  // This runs O(K * N) where K is the number of chunkExtractors and N is the number of chunks.
+  // Although this seems inefficient, K is a constant and N is typically small.
+  // We do this because we want to ensure that we extract each field separately, so the logic of one of the extractors
+  // doesn't make another one to fail.
+  processChunk(chunk: StreamChunk): void {
+    for (const extractor of Object.values(this.chunkExtractors)) {
+      safe(extractor)(chunk)
     }
+  }
+
+  extractUsage(responseBody: ResponseBody | StreamChunk): Usage | undefined {
+    const provider = findProvider({ providerId: this.providerId })
+    // This should never happen because we know the provider ID is valid, but we will throw an error to be safe.
+    if (!provider) throw new Error(`Provider not found for provider ID: ${this.providerId}`)
     const { usage } = extractUsage(provider, responseBody, this.apiFlavor)
     return usage
   }
 
+  toGenAiOtelAttributes(): GenAIAttributes {
+    return omitUndefined({
+      'gen_ai.system': this.providerId,
+      'gen_ai.operation.name': 'chat',
+      // Request Attributes
+      'gen_ai.request.model': this.extractedRequest?.requestModel,
+      'gen_ai.request.max_tokens': this.extractedRequest?.maxTokens,
+      'gen_ai.request.temperature': this.extractedRequest?.temperature,
+      'gen_ai.request.top_p': this.extractedRequest?.topP,
+      'gen_ai.request.top_k': this.extractedRequest?.topK,
+      'gen_ai.request.stop_sequences': this.extractedRequest?.stopSequences,
+      'gen_ai.request.seed': this.extractedRequest?.seed,
+      'gen_ai.system_instructions': this.extractedRequest?.systemInstructions,
+      'gen_ai.input.messages': this.extractedRequest?.inputMessages,
+      // Response Attributes
+      'gen_ai.response.model': this.extractedResponse?.responseModel,
+      'gen_ai.response.id': this.extractedResponse?.responseId,
+      'gen_ai.response.finish_reasons': this.extractedResponse?.finishReasons,
+      'gen_ai.output.messages': this.extractedResponse?.outputMessages,
+      'gen_ai.usage.input_tokens': this.extractedResponse?.usage?.input_tokens,
+      'gen_ai.usage.cache_read_tokens': this.extractedResponse?.usage?.cache_read_tokens,
+      'gen_ai.usage.cache_write_tokens': this.extractedResponse?.usage?.cache_write_tokens,
+      'gen_ai.usage.output_tokens': this.extractedResponse?.usage?.output_tokens,
+      'gen_ai.usage.input_audio_tokens': this.extractedResponse?.usage?.input_audio_tokens,
+      'gen_ai.usage.cache_audio_read_tokens': this.extractedResponse?.usage?.cache_audio_read_tokens,
+      'gen_ai.usage.output_audio_tokens': this.extractedResponse?.usage?.output_audio_tokens,
+    })
+  }
+
   // GenAIAttributesExtractor implementation
 
   requestMaxTokens?: (requestBody: RequestBody) => number | undefined
@@ -45,6 +138,8 @@ export abstract class BaseAPI<RequestBody, ResponseBody>
 
   extractOtelAttributes(requestBody: JsonData, responseBody: JsonData): GenAIAttributes {
     return {
+      'gen_ai.system': this.providerId,
+      'gen_ai.operation.name': 'chat',
       'gen_ai.request.max_tokens': this.genAIAttributes('requestMaxTokens', requestBody as RequestBody),
       'gen_ai.request.top_k': this.genAIAttributes('requestTopK', requestBody as RequestBody),
       'gen_ai.request.top_p': this.genAIAttributes('requestTopP', requestBody as RequestBody),
@@ -72,3 +167,7 @@ export abstract class BaseAPI<RequestBody, ResponseBody>
     return undefined
   }
 }
+
+function omitUndefined<T extends Record<string, unknown>>(obj: T): Partial<T> {
+  return Object.fromEntries(Object.entries(obj).filter(([_, v]) => v !== undefined)) as Partial<T>
+}
@@ -1,17 +1,19 @@
 /** This module implements the OpenAI Chat Completion API.
  * @see https://platform.openai.com/docs/api-reference/chat
  */
+
 import * as logfire from '@pydantic/logfire-api'
 import mime from 'mime-types'
 import type {
   ChatCompletion,
+  ChatCompletionChunk,
   ChatCompletionCreateParams,
   ChatCompletionMessageParam,
 } from 'openai/resources/chat/completions'
 import type { ChatMessage, InputMessages, MessagePart, OutputMessage, OutputMessages } from '../otel/genai'
-import { BaseAPI } from './base'
+import { BaseAPI, type ExtractedRequest, type ExtractedResponse, type ExtractorConfig } from './base'
 
-export class ChatCompletionAPI extends BaseAPI<ChatCompletionCreateParams, ChatCompletion> {
+export class ChatCompletionAPI extends BaseAPI<ChatCompletionCreateParams, ChatCompletion, ChatCompletionChunk> {
   apiFlavor = 'chat'
 
   requestStopSequences = (requestBody: ChatCompletionCreateParams): string[] | undefined => {
@@ -41,6 +43,56 @@ export class ChatCompletionAPI extends BaseAPI<ChatCompletionCreateParams, ChatC
   outputMessages = (responseBody: ChatCompletion): OutputMessages | undefined => {
     return responseBody.choices.map(mapOutputMessage)
   }
+
+  // SafeExtractor implementation
+
+  requestExtractors: ExtractorConfig<ChatCompletionCreateParams, ExtractedRequest> = {
+    requestModel: (requestBody: ChatCompletionCreateParams) => {
+      this.extractedRequest.requestModel = requestBody.model ?? undefined
+    },
+    maxTokens: (requestBody: ChatCompletionCreateParams) => {
+      this.extractedRequest.maxTokens = requestBody.max_completion_tokens ?? undefined
+    },
+    temperature: (requestBody: ChatCompletionCreateParams) => {
+      this.extractedRequest.temperature = requestBody.temperature ?? undefined
+    },
+    topP: (requestBody: ChatCompletionCreateParams) => {
+      this.extractedRequest.topP = requestBody.top_p ?? undefined
+    },
+    stopSequences: (requestBody: ChatCompletionCreateParams) => {
+      this.extractedRequest.stopSequences =
+        typeof requestBody.stop === 'string' ? [requestBody.stop] : (requestBody.stop ?? undefined)
+    },
+  }
+
+  chunkExtractors: ExtractorConfig<ChatCompletionChunk, ExtractedResponse> = {
+    usage: (chunk: ChatCompletionChunk) => {
+      if ('usage' in chunk && chunk.usage) {
+        this.extractedResponse.usage = this.extractUsage(chunk)
+      }
+    },
+    responseModel: (chunk: ChatCompletionChunk) => {
+      if ('model' in chunk && chunk.model) {
+        this.extractedResponse.responseModel = chunk.model
+      }
+    },
+    responseId: (chunk: ChatCompletionChunk) => {
+      if ('id' in chunk && chunk.id) {
+        this.extractedResponse.responseId = chunk.id
+      }
+    },
+    finishReasons: (chunk: ChatCompletionChunk) => {
+      const finishReasons: string[] = []
+      for (const choice of chunk.choices) {
+        if (choice.finish_reason) {
+          finishReasons.push(choice.finish_reason)
+        }
+      }
+      this.extractedResponse.finishReasons = finishReasons.length > 0 ? finishReasons : undefined
+    },
+    // TODO(Marcelo): We should implement this one.
+    outputMessages: (_chunk: ChatCompletionChunk) => {},
+  }
 }
 
 export function mapInputMessage(message: ChatCompletionMessageParam): ChatMessage {