Fix Laminar span stack warning in LocalConversation (#1039)

jpshackelford · openhands-agent · web-flow · commit e5a7efea44cf · 2025-11-06T02:28:58.000+08:00
Co-authored-by: openhands &lt;openhands@all-hands.dev&gt;
diff --git a/openhands-sdk/openhands/sdk/conversation/base.py b/openhands-sdk/openhands/sdk/conversation/base.py
@@ -9,6 +9,11 @@
 from openhands.sdk.conversation.types import ConversationCallbackType, ConversationID
 from openhands.sdk.llm.llm import LLM
 from openhands.sdk.llm.message import Message
+from openhands.sdk.observability.laminar import (
+    end_active_span,
+    should_enable_observability,
+    start_active_span,
+)
 from openhands.sdk.security.confirmation_policy import (
     ConfirmationPolicyBase,
     NeverConfirm,
@@ -76,6 +81,25 @@ class BaseConversation(ABC):
     exchange, execution control, and state management.
     """
 
+    def __init__(self) -> None:
+        """Initialize the base conversation with span tracking."""
+        self._span_ended = False
+
+    def _start_observability_span(self, session_id: str) -> None:
+        """Start an observability span if observability is enabled.
+
+        Args:
+            session_id: The session ID to associate with the span
+        """
+        if should_enable_observability():
+            start_active_span("conversation", session_id=session_id)
+
+    def _end_observability_span(self) -> None:
+        """End the observability span if it hasn't been ended already."""
+        if not self._span_ended and should_enable_observability():
+            end_active_span()
+            self._span_ended = True
+
     @property
     @abstractmethod
     def id(self) -> ConversationID: ...
diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -26,12 +26,7 @@
 from openhands.sdk.llm import LLM, Message, TextContent
 from openhands.sdk.llm.llm_registry import LLMRegistry
 from openhands.sdk.logger import get_logger
-from openhands.sdk.observability.laminar import (
-    end_active_span,
-    observe,
-    should_enable_observability,
-    start_active_span,
-)
+from openhands.sdk.observability.laminar import observe
 from openhands.sdk.security.confirmation_policy import (
     ConfirmationPolicyBase,
 )
@@ -144,10 +139,10 @@ def _default_callback(e):
             secret_values: dict[str, SecretValue] = {k: v for k, v in secrets.items()}
             self.update_secrets(secret_values)
 
+        super().__init__()  # Initialize base class with span tracking
         self._cleanup_initiated = False
         atexit.register(self.close)
-        if should_enable_observability():
-            start_active_span("conversation", session_id=str(desired_id))
+        self._start_observability_span(str(desired_id))
 
     @property
     def id(self) -> ConversationID:
@@ -306,7 +301,7 @@ def run(self) -> None:
             # Re-raise with conversation id for better UX; include original traceback
             raise ConversationRunError(self._state.id, e) from e
         finally:
-            end_active_span()
+            self._end_observability_span()
 
     def set_confirmation_policy(self, policy: ConfirmationPolicyBase) -> None:
         """Set the confirmation policy and store it in conversation state."""
@@ -389,7 +384,7 @@ def close(self) -> None:
             return
         self._cleanup_initiated = True
         logger.debug("Closing conversation and cleaning up tool executors")
-        end_active_span()
+        self._end_observability_span()
         for tool in self.agent.tools_map.values():
             try:
                 executable_tool = tool.as_executable()
diff --git a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py
@@ -28,12 +28,7 @@
 )
 from openhands.sdk.llm import LLM, Message, TextContent
 from openhands.sdk.logger import get_logger
-from openhands.sdk.observability.laminar import (
-    end_active_span,
-    observe,
-    should_enable_observability,
-    start_active_span,
-)
+from openhands.sdk.observability.laminar import observe
 from openhands.sdk.security.confirmation_policy import (
     ConfirmationPolicyBase,
 )
@@ -438,6 +433,7 @@ def __init__(
                                   which agent/conversation is speaking.
             secrets: Optional secrets to initialize the conversation with
         """
+        super().__init__()  # Initialize base class with span tracking
         self.agent = agent
         self._callbacks = callbacks or []
         self.max_iteration_per_run = max_iteration_per_run
@@ -513,8 +509,7 @@ def __init__(
             secret_values: dict[str, SecretValue] = {k: v for k, v in secrets.items()}
             self.update_secrets(secret_values)
 
-        if should_enable_observability():
-            start_active_span("conversation", session_id=str(self._id))
+        self._start_observability_span(str(self._id))
 
     @property
     def id(self) -> ConversationID:
@@ -653,7 +648,7 @@ def close(self) -> None:
         except Exception:
             pass
 
-        end_active_span()
+        self._end_observability_span()
 
         try:
             self._client.close()
diff --git a/tests/sdk/conversation/local/test_span_double_ending.py b/tests/sdk/conversation/local/test_span_double_ending.py
@@ -0,0 +1,146 @@
+"""Test for the span double-ending issue in LocalConversation."""
+
+import logging
+import tempfile
+from unittest.mock import patch
+
+import pytest
+from pydantic import SecretStr
+
+from openhands.sdk.agent import Agent
+from openhands.sdk.conversation.impl.local_conversation import LocalConversation
+from openhands.sdk.llm import LLM
+
+
+def create_test_agent() -> Agent:
+    """Create a test agent."""
+    llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")
+    return Agent(llm=llm, tools=[])
+
+
+def test_no_double_span_ending_warning(caplog):
+    """Test that LocalConversation doesn't produce double span ending warnings."""
+
+    # Create test agent
+    agent = create_test_agent()
+
+    # Create a temporary workspace
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create conversation
+        conversation = LocalConversation(
+            agent=agent,
+            workspace=temp_dir,
+            visualize=False,  # Disable visualization to simplify test
+        )
+
+        # Capture logs at WARNING level
+        with caplog.at_level(logging.WARNING):
+            # Mock the agent.step to raise an exception to trigger the finally block
+            with patch(
+                "openhands.sdk.agent.agent.Agent.step",
+                side_effect=Exception("Test exception"),
+            ):
+                # Try to run the conversation (will fail due to mocked exception)
+                with pytest.raises(Exception):
+                    conversation.run()
+
+            # Close the conversation (this would normally be called by __del__)
+            conversation.close()
+
+        # Check that no warning about empty span stack was logged
+        warning_messages = [
+            record.message for record in caplog.records if record.levelname == "WARNING"
+        ]
+        span_warnings = [
+            msg
+            for msg in warning_messages
+            if "Attempted to end active span, but stack is empty" in msg
+        ]
+
+        # This test should fail initially (showing the bug exists)
+        # After the fix, there should be no span warnings
+        assert len(span_warnings) == 0, f"Found span warnings: {span_warnings}"
+
+
+def test_span_ending_with_successful_run(caplog):
+    """Test span ending behavior with a successful run (no exceptions)."""
+
+    # Create test agent
+    agent = create_test_agent()
+
+    # Create a temporary workspace
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create conversation
+        conversation = LocalConversation(
+            agent=agent, workspace=temp_dir, visualize=False
+        )
+
+        # Mock the agent.step to finish immediately (no iterations)
+        def finish_immediately(*args, **kwargs):
+            conversation._state.execution_status = (
+                conversation._state.execution_status.__class__.FINISHED
+            )
+
+        # Capture logs at WARNING level
+        with caplog.at_level(logging.WARNING):
+            with patch(
+                "openhands.sdk.agent.agent.Agent.step", side_effect=finish_immediately
+            ):
+                # Run the conversation successfully
+                conversation.run()
+
+            # Close the conversation
+            conversation.close()
+
+        # Check that no warning about empty span stack was logged
+        warning_messages = [
+            record.message for record in caplog.records if record.levelname == "WARNING"
+        ]
+        span_warnings = [
+            msg
+            for msg in warning_messages
+            if "Attempted to end active span, but stack is empty" in msg
+        ]
+
+        assert len(span_warnings) == 0, f"Found span warnings: {span_warnings}"
+
+
+def test_no_span_operations_when_observability_disabled(caplog):
+    """Test that no span operations occur when observability is disabled."""
+
+    # Create test agent
+    agent = create_test_agent()
+
+    # Create a temporary workspace
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create conversation
+        conversation = LocalConversation(
+            agent=agent, workspace=temp_dir, visualize=False
+        )
+
+        # Mock the agent.step to finish immediately
+        def finish_immediately(*args, **kwargs):
+            conversation._state.execution_status = (
+                conversation._state.execution_status.__class__.FINISHED
+            )
+
+        # Capture logs at WARNING level
+        with caplog.at_level(logging.WARNING):
+            # Run and close the conversation
+            with patch(
+                "openhands.sdk.agent.agent.Agent.step", side_effect=finish_immediately
+            ):
+                conversation.run()
+            conversation.close()
+
+        # Check that no warning about empty span stack was logged
+        warning_messages = [
+            record.message for record in caplog.records if record.levelname == "WARNING"
+        ]
+        span_warnings = [
+            msg
+            for msg in warning_messages
+            if "Attempted to end active span, but stack is empty" in msg
+        ]
+
+        assert len(span_warnings) == 0, f"Found span warnings: {span_warnings}"
diff --git a/tests/sdk/conversation/test_base_span_management.py b/tests/sdk/conversation/test_base_span_management.py