Skip to content

Commit 355b3bb

Browse files
authored
feat(telemetry): updated semantic conventions, added timeToFirstByteMs into spans and metrics (#997)
* feat(telemetry): added timeToFirstByteMs into spans and metrics * chore(trace): updated semantic conventions with tool mappings
1 parent 7fbc9dc commit 355b3bb

File tree

9 files changed

+195
-50
lines changed

9 files changed

+195
-50
lines changed

src/strands/event_loop/event_loop.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ async def _handle_model_execution(
281281
message = recover_message_on_max_tokens_reached(message)
282282

283283
if model_invoke_span:
284-
tracer.end_model_invoke_span(model_invoke_span, message, usage, stop_reason)
284+
tracer.end_model_invoke_span(model_invoke_span, message, usage, metrics, stop_reason)
285285
break # Success! Break out of retry loop
286286

287287
except Exception as e:

src/strands/event_loop/streaming.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import json
44
import logging
5+
import time
56
from typing import Any, AsyncGenerator, AsyncIterable, Optional
67

78
from ..models.model import Model
@@ -267,31 +268,38 @@ def handle_redact_content(event: RedactContentEvent, state: dict[str, Any]) -> N
267268
state["message"]["content"] = [{"text": event["redactAssistantContentMessage"]}]
268269

269270

270-
def extract_usage_metrics(event: MetadataEvent) -> tuple[Usage, Metrics]:
271+
def extract_usage_metrics(event: MetadataEvent, time_to_first_byte_ms: int | None = None) -> tuple[Usage, Metrics]:
271272
"""Extracts usage metrics from the metadata chunk.
272273
273274
Args:
274275
event: metadata.
276+
time_to_first_byte_ms: time to get the first byte from the model in milliseconds
275277
276278
Returns:
277279
The extracted usage metrics and latency.
278280
"""
279281
usage = Usage(**event["usage"])
280282
metrics = Metrics(**event["metrics"])
283+
if time_to_first_byte_ms:
284+
metrics["timeToFirstByteMs"] = time_to_first_byte_ms
281285

282286
return usage, metrics
283287

284288

285-
async def process_stream(chunks: AsyncIterable[StreamEvent]) -> AsyncGenerator[TypedEvent, None]:
289+
async def process_stream(
290+
chunks: AsyncIterable[StreamEvent], start_time: float | None = None
291+
) -> AsyncGenerator[TypedEvent, None]:
286292
"""Processes the response stream from the API, constructing the final message and extracting usage metrics.
287293
288294
Args:
289295
chunks: The chunks of the response stream from the model.
296+
start_time: Time when the model request is initiated
290297
291298
Yields:
292299
The reason for stopping, the constructed message, and the usage metrics.
293300
"""
294301
stop_reason: StopReason = "end_turn"
302+
first_byte_time = None
295303

296304
state: dict[str, Any] = {
297305
"message": {"role": "assistant", "content": []},
@@ -303,10 +311,14 @@ async def process_stream(chunks: AsyncIterable[StreamEvent]) -> AsyncGenerator[T
303311
state["content"] = state["message"]["content"]
304312

305313
usage: Usage = Usage(inputTokens=0, outputTokens=0, totalTokens=0)
306-
metrics: Metrics = Metrics(latencyMs=0)
314+
metrics: Metrics = Metrics(latencyMs=0, timeToFirstByteMs=0)
307315

308316
async for chunk in chunks:
317+
# Track first byte time when we get first content
318+
if first_byte_time is None and ("contentBlockDelta" in chunk or "contentBlockStart" in chunk):
319+
first_byte_time = time.time()
309320
yield ModelStreamChunkEvent(chunk=chunk)
321+
310322
if "messageStart" in chunk:
311323
state["message"] = handle_message_start(chunk["messageStart"], state["message"])
312324
elif "contentBlockStart" in chunk:
@@ -319,7 +331,10 @@ async def process_stream(chunks: AsyncIterable[StreamEvent]) -> AsyncGenerator[T
319331
elif "messageStop" in chunk:
320332
stop_reason = handle_message_stop(chunk["messageStop"])
321333
elif "metadata" in chunk:
322-
usage, metrics = extract_usage_metrics(chunk["metadata"])
334+
time_to_first_byte_ms = (
335+
int(1000 * (first_byte_time - start_time)) if (start_time and first_byte_time) else None
336+
)
337+
usage, metrics = extract_usage_metrics(chunk["metadata"], time_to_first_byte_ms)
323338
elif "redactContent" in chunk:
324339
handle_redact_content(chunk["redactContent"], state)
325340

@@ -346,7 +361,8 @@ async def stream_messages(
346361
logger.debug("model=<%s> | streaming messages", model)
347362

348363
messages = remove_blank_messages_content_text(messages)
364+
start_time = time.time()
349365
chunks = model.stream(messages, tool_specs if tool_specs else None, system_prompt)
350366

351-
async for event in process_stream(chunks):
367+
async for event in process_stream(chunks, start_time):
352368
yield event

src/strands/telemetry/metrics.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ def update_metrics(self, metrics: Metrics) -> None:
286286
metrics: The metrics data to add to the accumulated totals.
287287
"""
288288
self._metrics_client.event_loop_latency.record(metrics["latencyMs"])
289+
if metrics.get("timeToFirstByteMs") is not None:
290+
self._metrics_client.model_time_to_first_token.record(metrics["timeToFirstByteMs"])
289291
self.accumulated_metrics["latencyMs"] += metrics["latencyMs"]
290292

291293
def get_summary(self) -> Dict[str, Any]:
@@ -448,7 +450,7 @@ class MetricsClient:
448450
event_loop_output_tokens: Histogram
449451
event_loop_cache_read_input_tokens: Histogram
450452
event_loop_cache_write_input_tokens: Histogram
451-
453+
model_time_to_first_token: Histogram
452454
tool_call_count: Counter
453455
tool_success_count: Counter
454456
tool_error_count: Counter
@@ -507,3 +509,6 @@ def create_instruments(self) -> None:
507509
self.event_loop_cache_write_input_tokens = self.meter.create_histogram(
508510
name=constants.STRANDS_EVENT_LOOP_CACHE_WRITE_INPUT_TOKENS, unit="token"
509511
)
512+
self.model_time_to_first_token = self.meter.create_histogram(
513+
name=constants.STRANDS_MODEL_TIME_TO_FIRST_TOKEN, unit="ms"
514+
)

src/strands/telemetry/metrics_constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@
1515
STRANDS_EVENT_LOOP_OUTPUT_TOKENS = "strands.event_loop.output.tokens"
1616
STRANDS_EVENT_LOOP_CACHE_READ_INPUT_TOKENS = "strands.event_loop.cache_read.input.tokens"
1717
STRANDS_EVENT_LOOP_CACHE_WRITE_INPUT_TOKENS = "strands.event_loop.cache_write.input.tokens"
18+
STRANDS_MODEL_TIME_TO_FIRST_TOKEN = "strands.model.time_to_first_token"

src/strands/telemetry/tracer.py

Lines changed: 81 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from ..agent.agent_result import AgentResult
1818
from ..types.content import ContentBlock, Message, Messages
19-
from ..types.streaming import StopReason, Usage
19+
from ..types.streaming import Metrics, StopReason, Usage
2020
from ..types.tools import ToolResult, ToolUse
2121
from ..types.traces import Attributes, AttributeValue
2222

@@ -153,6 +153,28 @@ def _set_attributes(self, span: Span, attributes: Dict[str, AttributeValue]) ->
153153
for key, value in attributes.items():
154154
span.set_attribute(key, value)
155155

156+
def _add_optional_usage_and_metrics_attributes(
157+
self, attributes: Dict[str, AttributeValue], usage: Usage, metrics: Metrics
158+
) -> None:
159+
"""Add optional usage and metrics attributes if they have values.
160+
161+
Args:
162+
attributes: Dictionary to add attributes to
163+
usage: Token usage information from the model call
164+
metrics: Metrics from the model call
165+
"""
166+
if "cacheReadInputTokens" in usage:
167+
attributes["gen_ai.usage.cache_read_input_tokens"] = usage["cacheReadInputTokens"]
168+
169+
if "cacheWriteInputTokens" in usage:
170+
attributes["gen_ai.usage.cache_write_input_tokens"] = usage["cacheWriteInputTokens"]
171+
172+
if metrics.get("timeToFirstByteMs", 0) > 0:
173+
attributes["gen_ai.server.time_to_first_token"] = metrics["timeToFirstByteMs"]
174+
175+
if metrics.get("latencyMs", 0) > 0:
176+
attributes["gen_ai.server.request.duration"] = metrics["latencyMs"]
177+
156178
def _end_span(
157179
self,
158180
span: Span,
@@ -277,14 +299,21 @@ def start_model_invoke_span(
277299
return span
278300

279301
def end_model_invoke_span(
280-
self, span: Span, message: Message, usage: Usage, stop_reason: StopReason, error: Optional[Exception] = None
302+
self,
303+
span: Span,
304+
message: Message,
305+
usage: Usage,
306+
metrics: Metrics,
307+
stop_reason: StopReason,
308+
error: Optional[Exception] = None,
281309
) -> None:
282310
"""End a model invocation span with results and metrics.
283311
284312
Args:
285313
span: The span to end.
286314
message: The message response from the model.
287315
usage: Token usage information from the model call.
316+
metrics: Metrics from the model call.
288317
stop_reason (StopReason): The reason the model stopped generating.
289318
error: Optional exception if the model call failed.
290319
"""
@@ -294,10 +323,11 @@ def end_model_invoke_span(
294323
"gen_ai.usage.completion_tokens": usage["outputTokens"],
295324
"gen_ai.usage.output_tokens": usage["outputTokens"],
296325
"gen_ai.usage.total_tokens": usage["totalTokens"],
297-
"gen_ai.usage.cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
298-
"gen_ai.usage.cache_write_input_tokens": usage.get("cacheWriteInputTokens", 0),
299326
}
300327

328+
# Add optional attributes if they have values
329+
self._add_optional_usage_and_metrics_attributes(attributes, usage, metrics)
330+
301331
if self.use_latest_genai_conventions:
302332
self._add_event(
303333
span,
@@ -307,7 +337,7 @@ def end_model_invoke_span(
307337
[
308338
{
309339
"role": message["role"],
310-
"parts": [{"type": "text", "content": message["content"]}],
340+
"parts": self._map_content_blocks_to_otel_parts(message["content"]),
311341
"finish_reason": str(stop_reason),
312342
}
313343
]
@@ -362,7 +392,7 @@ def start_tool_call_span(self, tool: ToolUse, parent_span: Optional[Span] = None
362392
"type": "tool_call",
363393
"name": tool["name"],
364394
"id": tool["toolUseId"],
365-
"arguments": [{"content": tool["input"]}],
395+
"arguments": tool["input"],
366396
}
367397
],
368398
}
@@ -417,7 +447,7 @@ def end_tool_call_span(
417447
{
418448
"type": "tool_call_response",
419449
"id": tool_result.get("toolUseId", ""),
420-
"result": tool_result.get("content"),
450+
"response": tool_result.get("content"),
421451
}
422452
],
423453
}
@@ -504,7 +534,7 @@ def end_event_loop_cycle_span(
504534
[
505535
{
506536
"role": tool_result_message["role"],
507-
"parts": [{"type": "text", "content": tool_result_message["content"]}],
537+
"parts": self._map_content_blocks_to_otel_parts(tool_result_message["content"]),
508538
}
509539
]
510540
)
@@ -634,19 +664,23 @@ def start_multiagent_span(
634664
)
635665

636666
span = self._start_span(operation, attributes=attributes, span_kind=trace_api.SpanKind.CLIENT)
637-
content = serialize(task) if isinstance(task, list) else task
638667

639668
if self.use_latest_genai_conventions:
669+
parts: list[dict[str, Any]] = []
670+
if isinstance(task, list):
671+
parts = self._map_content_blocks_to_otel_parts(task)
672+
else:
673+
parts = [{"type": "text", "content": task}]
640674
self._add_event(
641675
span,
642676
"gen_ai.client.inference.operation.details",
643-
{"gen_ai.input.messages": serialize([{"role": "user", "parts": [{"type": "text", "content": task}]}])},
677+
{"gen_ai.input.messages": serialize([{"role": "user", "parts": parts}])},
644678
)
645679
else:
646680
self._add_event(
647681
span,
648682
"gen_ai.user.message",
649-
event_attributes={"content": content},
683+
event_attributes={"content": serialize(task) if isinstance(task, list) else task},
650684
)
651685

652686
return span
@@ -718,7 +752,7 @@ def _add_event_messages(self, span: Span, messages: Messages) -> None:
718752
input_messages: list = []
719753
for message in messages:
720754
input_messages.append(
721-
{"role": message["role"], "parts": [{"type": "text", "content": message["content"]}]}
755+
{"role": message["role"], "parts": self._map_content_blocks_to_otel_parts(message["content"])}
722756
)
723757
self._add_event(
724758
span, "gen_ai.client.inference.operation.details", {"gen_ai.input.messages": serialize(input_messages)}
@@ -731,6 +765,41 @@ def _add_event_messages(self, span: Span, messages: Messages) -> None:
731765
{"content": serialize(message["content"])},
732766
)
733767

768+
def _map_content_blocks_to_otel_parts(self, content_blocks: list[ContentBlock]) -> list[dict[str, Any]]:
769+
"""Map ContentBlock objects to OpenTelemetry parts format."""
770+
parts: list[dict[str, Any]] = []
771+
772+
for block in content_blocks:
773+
if "text" in block:
774+
# Standard TextPart
775+
parts.append({"type": "text", "content": block["text"]})
776+
elif "toolUse" in block:
777+
# Standard ToolCallRequestPart
778+
tool_use = block["toolUse"]
779+
parts.append(
780+
{
781+
"type": "tool_call",
782+
"name": tool_use["name"],
783+
"id": tool_use["toolUseId"],
784+
"arguments": tool_use["input"],
785+
}
786+
)
787+
elif "toolResult" in block:
788+
# Standard ToolCallResponsePart
789+
tool_result = block["toolResult"]
790+
parts.append(
791+
{
792+
"type": "tool_call_response",
793+
"id": tool_result["toolUseId"],
794+
"response": tool_result["content"],
795+
}
796+
)
797+
else:
798+
# For all other ContentBlock types, use the key as type and value as content
799+
for key, value in block.items():
800+
parts.append({"type": key, "content": value})
801+
return parts
802+
734803

735804
# Singleton instance for global access
736805
_tracer_instance = None

src/strands/types/event_loop.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,17 @@ class Usage(TypedDict, total=False):
2323
cacheWriteInputTokens: int
2424

2525

26-
class Metrics(TypedDict):
26+
class Metrics(TypedDict, total=False):
2727
"""Performance metrics for model interactions.
2828
2929
Attributes:
3030
latencyMs (int): Latency of the model request in milliseconds.
31+
timeToFirstByteMs (int): Latency from sending model request to first
32+
content chunk (contentBlockDelta or contentBlockStart) from the model in milliseconds.
3133
"""
3234

33-
latencyMs: int
35+
latencyMs: Required[int]
36+
timeToFirstByteMs: int
3437

3538

3639
StopReason = Literal[

tests/strands/event_loop/test_streaming.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ def test_extract_usage_metrics_with_cache_tokens():
491491
"content": [],
492492
},
493493
{"inputTokens": 0, "outputTokens": 0, "totalTokens": 0},
494-
{"latencyMs": 0},
494+
{"latencyMs": 0, "timeToFirstByteMs": 0},
495495
),
496496
},
497497
],
@@ -781,7 +781,7 @@ async def test_stream_messages(agenerator, alist):
781781
"end_turn",
782782
{"role": "assistant", "content": [{"text": "test"}]},
783783
{"inputTokens": 0, "outputTokens": 0, "totalTokens": 0},
784-
{"latencyMs": 0},
784+
{"latencyMs": 0, "timeToFirstByteMs": 0},
785785
)
786786
},
787787
]

0 commit comments

Comments
 (0)