chore: Add span for context caching handling and new cache creation

seanzhougoogle · copybara-github · commit a2d9f13fa1d3 · 2025-10-17T15:37:35.000-07:00
PiperOrigin-RevId: 820852233
diff --git a/src/google/adk/models/gemini_context_cache_manager.py b/src/google/adk/models/gemini_context_cache_manager.py
@@ -353,57 +353,66 @@ async def _create_gemini_cache(
     Returns:
         Cache metadata with precise creation timestamp
     """
-    # Prepare cache contents (first N contents + system instruction + tools)
-    cache_contents = llm_request.contents[:cache_contents_count]
-
-    cache_config = types.CreateCachedContentConfig(
-        contents=cache_contents,
-        ttl=llm_request.cache_config.ttl_string,
-        display_name=(
-            f"adk-cache-{int(time.time())}-{cache_contents_count}contents"
-        ),
-    )
-
-    # Add system instruction if present
-    if llm_request.config and llm_request.config.system_instruction:
-      cache_config.system_instruction = llm_request.config.system_instruction
-      logger.debug(
-          "Added system instruction to cache config (length=%d)",
-          len(llm_request.config.system_instruction),
+    from ..telemetry.tracing import tracer
+
+    with tracer.start_as_current_span("create_cache") as span:
+      # Prepare cache contents (first N contents + system instruction + tools)
+      cache_contents = llm_request.contents[:cache_contents_count]
+
+      cache_config = types.CreateCachedContentConfig(
+          contents=cache_contents,
+          ttl=llm_request.cache_config.ttl_string,
+          display_name=(
+              f"adk-cache-{int(time.time())}-{cache_contents_count}contents"
+          ),
       )
 
-    # Add tools if present
-    if llm_request.config and llm_request.config.tools:
-      cache_config.tools = llm_request.config.tools
+      # Add system instruction if present
+      if llm_request.config and llm_request.config.system_instruction:
+        cache_config.system_instruction = llm_request.config.system_instruction
+        logger.debug(
+            "Added system instruction to cache config (length=%d)",
+            len(llm_request.config.system_instruction),
+        )
 
-    # Add tool config if present
-    if llm_request.config and llm_request.config.tool_config:
-      cache_config.tool_config = llm_request.config.tool_config
+      # Add tools if present
+      if llm_request.config and llm_request.config.tools:
+        cache_config.tools = llm_request.config.tools
 
-    logger.debug(
-        "Creating cache with model %s and config: %s",
-        llm_request.model,
-        cache_config,
-    )
-    cached_content = await self.genai_client.aio.caches.create(
-        model=llm_request.model,
-        config=cache_config,
-    )
-    # Set precise creation timestamp right after cache creation
-    created_at = time.time()
-    logger.info("Cache created successfully: %s", cached_content.name)
+      # Add tool config if present
+      if llm_request.config and llm_request.config.tool_config:
+        cache_config.tool_config = llm_request.config.tool_config
 
-    # Return complete cache metadata with precise timing
-    return CacheMetadata(
-        cache_name=cached_content.name,
-        expire_time=created_at + llm_request.cache_config.ttl_seconds,
-        fingerprint=self._generate_cache_fingerprint(
-            llm_request, cache_contents_count
-        ),
-        invocations_used=1,
-        contents_count=cache_contents_count,
-        created_at=created_at,
-    )
+      span.set_attribute("cache_contents_count", cache_contents_count)
+      span.set_attribute("model", llm_request.model)
+      span.set_attribute("ttl_seconds", llm_request.cache_config.ttl_seconds)
+
+      logger.debug(
+          "Creating cache with model %s and config: %s",
+          llm_request.model,
+          cache_config,
+      )
+      cached_content = await self.genai_client.aio.caches.create(
+          model=llm_request.model,
+          config=cache_config,
+      )
+      # Set precise creation timestamp right after cache creation
+      created_at = time.time()
+      logger.info("Cache created successfully: %s", cached_content.name)
+
+      span.set_attribute("cache_name", cached_content.name)
+
+      # Return complete cache metadata with precise timing
+      return CacheMetadata(
+          cache_name=cached_content.name,
+          expire_time=created_at + llm_request.cache_config.ttl_seconds,
+          fingerprint=self._generate_cache_fingerprint(
+              llm_request, cache_contents_count
+          ),
+          invocations_used=1,
+          contents_count=cache_contents_count,
+          created_at=created_at,
+      )
 
   async def cleanup_cache(self, cache_name: str) -> None:
     """Clean up cache by deleting it.
diff --git a/src/google/adk/models/google_llm.py b/src/google/adk/models/google_llm.py
@@ -115,10 +115,18 @@ async def generate_content_async(
     cache_metadata = None
     cache_manager = None
     if llm_request.cache_config:
+      from ..telemetry.tracing import tracer
       from .gemini_context_cache_manager import GeminiContextCacheManager
 
-      cache_manager = GeminiContextCacheManager(self.api_client)
-      cache_metadata = await cache_manager.handle_context_caching(llm_request)
+      with tracer.start_as_current_span('handle_context_caching') as span:
+        cache_manager = GeminiContextCacheManager(self.api_client)
+        cache_metadata = await cache_manager.handle_context_caching(llm_request)
+        if cache_metadata:
+          if cache_metadata.cache_name:
+            span.set_attribute('cache_action', 'active_cache')
+            span.set_attribute('cache_name', cache_metadata.cache_name)
+          else:
+            span.set_attribute('cache_action', 'fingerprint_only')
 
     logger.info(
         'Sending out request, model: %s, backend: %s, stream: %s',