🤖 fix: clear todos on stream end with smart reconnection handling (#498)

ammar-agent · web-flow · commit cc64299d5c83 · 2025-10-31T18:50:02.000Z
## Problem

**TODOs were not being cleared when streams ended**, causing stale todos
to persist in the UI until the next user message. Additionally, on page
reload, TODOs were incorrectly reconstructed from history even for
completed streams.

The challenge: We need **different behavior for two reload scenarios**:
- **Reconnection during active stream**: Should reconstruct TODOs (work
in progress)
- **Reload after completed stream**: Should NOT reconstruct TODOs (clean
slate)

## Solution

### 1. Clear TODOs on stream end
Modified `cleanupStreamState()` to clear `currentTodos` when streams
complete (end/abort/error). TODOs are now truly stream-scoped.

### 2. Smart reconstruction on reload
**Key insight**: Check buffered events for `stream-start` to detect
active streams.

- `loadHistoricalMessages()` now accepts `hasActiveStream` parameter
- `WorkspaceStore` checks buffered events before loading history
- **Active stream** (hasActiveStream=true) → Reconstruct TODOs ✅
- **Completed stream** (hasActiveStream=false) → Don't reconstruct TODOs
✅
- **agentStatus** always reconstructed (persists across sessions) ✅

### 3. Improved separation of concerns
Centralized tool persistence logic in `processToolResult`:
- Added `context` parameter: `"streaming" | "historical"`
- `loadHistoricalMessages` no longer knows about specific tool behaviors
- Each tool declares its own persistence policy in one place

## Implementation

**WorkspaceStore checks for active streams:**
```typescript
const pendingEvents = this.pendingStreamEvents.get(workspaceId) ?? [];
const hasActiveStream = pendingEvents.some(
  (event) =&gt; "type" in event &amp;&amp; event.type === "stream-start"
);
aggregator.loadHistoricalMessages(historicalMsgs, hasActiveStream);
```

**StreamingMessageAggregator handles context:**
```typescript
loadHistoricalMessages(messages: CmuxMessage[], hasActiveStream = false) {
  const context = hasActiveStream ? "streaming" : "historical";
  // Process tool results with context
  this.processToolResult(toolName, input, output, context);
}
```

**processToolResult decides based on tool + context:**
```typescript
private processToolResult(toolName, input, output, context: "streaming" | "historical") {
  // TODOs: stream-scoped (only during streaming)
  if (toolName === "todo_write" &amp;&amp; context === "streaming") {
    this.currentTodos = args.todos;
  }
  
  // agentStatus: persistent (always reconstruct)
  if (toolName === "status_set") {
    this.agentStatus = { emoji, message, url };
  }
}
```

## Testing

Comprehensive test coverage for the full todo lifecycle:

✅ Clear todos on stream end  
✅ Clear todos on stream abort  
✅ Reconstruct todos when `hasActiveStream=true` (reconnection)  
✅ Don't reconstruct todos when `hasActiveStream=false` (completed)  
✅ Always reconstruct agentStatus (persists across sessions)  
✅ Clear todos on new user message

All 146 message-related tests pass.

## Behavior Matrix

| Scenario | TODOs | agentStatus |
|----------|-------|-------------|
| During streaming | ✅ Visible | ✅ Visible |
| Stream ends | ❌ Cleared | ✅ Persists |
| Reload (active stream) | ✅ Reconstructed | ✅ Reconstructed |
| Reload (completed) | ❌ Not reconstructed | ✅ Reconstructed |
| New user message | ❌ Cleared | ❌ Cleared |

## Key Insight

The **reconnection scenario** is the critical edge case: when a user
reloads during an active stream, historical messages contain completed
tool calls from the *current* stream, and buffered events contain
`stream-start`. In this case, we *should* reconstruct TODOs to show work
in progress.

This is fundamentally different from reloading after stream completion,
where TODOs should remain cleared.

---

_Generated with `cmux`_
diff --git a/src/components/PinnedTodoList.tsx b/src/components/PinnedTodoList.tsx
@@ -10,13 +10,14 @@ interface PinnedTodoListProps {
 
 /**
  * Pinned TODO list displayed at bottom of chat (before StreamingBarrier).
- * Shows current TODOs from active stream only.
+ * Shows current TODOs from active stream only - automatically cleared when stream ends.
  * Reuses TodoList component for consistent styling.
  *
  * Relies on natural reference stability from MapStore + Aggregator architecture:
  * - Aggregator.getCurrentTodos() returns direct reference (not a copy)
  * - Reference only changes when todos are actually modified
  * - MapStore caches WorkspaceState per version, avoiding unnecessary recomputation
+ * - Todos are cleared by StreamingMessageAggregator when stream completes
  */
 export const PinnedTodoList: React.FC<PinnedTodoListProps> = ({ workspaceId }) => {
   const [expanded, setExpanded] = usePersistedState("pinnedTodoExpanded", true);
@@ -27,17 +28,8 @@ export const PinnedTodoList: React.FC<PinnedTodoListProps> = ({ workspaceId }) =
     () => workspaceStore.getWorkspaceState(workspaceId).todos
   );
 
-  // Get streaming state
-  const canInterrupt = useSyncExternalStore(
-    (callback) => workspaceStore.subscribeKey(workspaceId, callback),
-    () => workspaceStore.getWorkspaceState(workspaceId).canInterrupt
-  );
-
-  // When idle (not streaming), only show completed todos for clean summary
-  // When streaming, show all todos so user can see active work
-  const displayTodos = canInterrupt ? todos : todos.filter((todo) => todo.status === "completed");
-
-  if (displayTodos.length === 0) {
+  // Todos are cleared when stream ends, so if there are todos they're from an active stream
+  if (todos.length === 0) {
     return null;
   }
 
@@ -57,7 +49,7 @@ export const PinnedTodoList: React.FC<PinnedTodoListProps> = ({ workspaceId }) =
         </span>
         TODO{expanded ? ":" : ""}
       </div>
-      {expanded && <TodoList todos={displayTodos} />}
+      {expanded && <TodoList todos={todos} />}
     </div>
   );
 };
diff --git a/src/stores/WorkspaceStore.ts b/src/stores/WorkspaceStore.ts
@@ -910,17 +910,22 @@ export class WorkspaceStore {
     const historicalMsgs = this.historicalMessages.get(workspaceId) ?? [];
 
     if (isCaughtUpMessage(data)) {
+      // Check if there's an active stream in buffered events (reconnection scenario)
+      const pendingEvents = this.pendingStreamEvents.get(workspaceId) ?? [];
+      const hasActiveStream = pendingEvents.some(
+        (event) => "type" in event && event.type === "stream-start"
+      );
+
       // Load historical messages first
       if (historicalMsgs.length > 0) {
-        aggregator.loadHistoricalMessages(historicalMsgs);
+        aggregator.loadHistoricalMessages(historicalMsgs, hasActiveStream);
         this.historicalMessages.set(workspaceId, []);
       }
 
       // Mark that we're replaying buffered history (prevents O(N) scheduling)
       this.replayingHistory.add(workspaceId);
 
       // Process buffered stream events now that history is loaded
-      const pendingEvents = this.pendingStreamEvents.get(workspaceId) ?? [];
       for (const event of pendingEvents) {
         this.processStreamEvent(workspaceId, aggregator, event);
       }
diff --git a/src/utils/messages/StreamingMessageAggregator.test.ts b/src/utils/messages/StreamingMessageAggregator.test.ts
@@ -137,4 +137,246 @@ describe("StreamingMessageAggregator", () => {
       expect(messages1).toBe(messages2);
     });
   });
+
+  describe("todo lifecycle", () => {
+    test("should clear todos when stream ends", () => {
+      const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT);
+
+      // Start a stream
+      aggregator.handleStreamStart({
+        type: "stream-start",
+        workspaceId: "test-workspace",
+        messageId: "msg1",
+        historySequence: 1,
+        model: "claude-3-5-sonnet-20241022",
+      });
+
+      // Simulate todo_write tool call
+      aggregator.handleToolCallStart({
+        messageId: "msg1",
+        toolCallId: "tool1",
+        toolName: "todo_write",
+        args: {
+          todos: [
+            { content: "Do task 1", status: "in_progress" },
+            { content: "Do task 2", status: "pending" },
+          ],
+        },
+        tokens: 10,
+        timestamp: Date.now(),
+        type: "tool-call-start",
+        workspaceId: "test-workspace",
+      });
+
+      aggregator.handleToolCallEnd({
+        type: "tool-call-end",
+        workspaceId: "test-workspace",
+        messageId: "msg1",
+        toolCallId: "tool1",
+        toolName: "todo_write",
+        result: { success: true },
+      });
+
+      // Verify todos are set
+      expect(aggregator.getCurrentTodos()).toHaveLength(2);
+      expect(aggregator.getCurrentTodos()[0].content).toBe("Do task 1");
+
+      // End the stream
+      aggregator.handleStreamEnd({
+        type: "stream-end",
+        workspaceId: "test-workspace",
+        messageId: "msg1",
+        metadata: {
+          historySequence: 1,
+          timestamp: Date.now(),
+          model: "claude-3-5-sonnet-20241022",
+        },
+        parts: [],
+      });
+
+      // Todos should be cleared
+      expect(aggregator.getCurrentTodos()).toHaveLength(0);
+    });
+
+    test("should clear todos when stream aborts", () => {
+      const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT);
+
+      aggregator.handleStreamStart({
+        type: "stream-start",
+        workspaceId: "test-workspace",
+        messageId: "msg1",
+        historySequence: 1,
+        model: "claude-3-5-sonnet-20241022",
+      });
+
+      // Simulate todo_write
+      aggregator.handleToolCallStart({
+        messageId: "msg1",
+        toolCallId: "tool1",
+        toolName: "todo_write",
+        args: {
+          todos: [{ content: "Task", status: "in_progress" }],
+        },
+        tokens: 10,
+        timestamp: Date.now(),
+        type: "tool-call-start",
+        workspaceId: "test-workspace",
+      });
+
+      aggregator.handleToolCallEnd({
+        type: "tool-call-end",
+        workspaceId: "test-workspace",
+        messageId: "msg1",
+        toolCallId: "tool1",
+        toolName: "todo_write",
+        result: { success: true },
+      });
+
+      expect(aggregator.getCurrentTodos()).toHaveLength(1);
+
+      // Abort the stream
+      aggregator.handleStreamAbort({
+        type: "stream-abort",
+        workspaceId: "test-workspace",
+        messageId: "msg1",
+        metadata: {},
+      });
+
+      // Todos should be cleared
+      expect(aggregator.getCurrentTodos()).toHaveLength(0);
+    });
+
+    test("should reconstruct todos on reload ONLY when reconnecting to active stream", () => {
+      const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT);
+
+      const historicalMessage = {
+        id: "msg1",
+        role: "assistant" as const,
+        parts: [
+          {
+            type: "dynamic-tool" as const,
+            toolCallId: "tool1",
+            toolName: "todo_write",
+            state: "output-available" as const,
+            input: {
+              todos: [
+                { content: "Historical task 1", status: "completed" },
+                { content: "Historical task 2", status: "completed" },
+              ],
+            },
+            output: { success: true },
+          },
+        ],
+        metadata: {
+          historySequence: 1,
+          timestamp: Date.now(),
+          model: "claude-3-5-sonnet-20241022",
+        },
+      };
+
+      // Scenario 1: Reload with active stream (hasActiveStream = true)
+      aggregator.loadHistoricalMessages([historicalMessage], true);
+      expect(aggregator.getCurrentTodos()).toHaveLength(2);
+      expect(aggregator.getCurrentTodos()[0].content).toBe("Historical task 1");
+
+      // Reset for next scenario
+      const aggregator2 = new StreamingMessageAggregator(TEST_CREATED_AT);
+
+      // Scenario 2: Reload without active stream (hasActiveStream = false)
+      aggregator2.loadHistoricalMessages([historicalMessage], false);
+      expect(aggregator2.getCurrentTodos()).toHaveLength(0);
+    });
+
+    test("should reconstruct agentStatus but NOT todos when no active stream", () => {
+      const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT);
+
+      const historicalMessage = {
+        id: "msg1",
+        role: "assistant" as const,
+        parts: [
+          {
+            type: "dynamic-tool" as const,
+            toolCallId: "tool1",
+            toolName: "todo_write",
+            state: "output-available" as const,
+            input: {
+              todos: [{ content: "Task 1", status: "completed" }],
+            },
+            output: { success: true },
+          },
+          {
+            type: "dynamic-tool" as const,
+            toolCallId: "tool2",
+            toolName: "status_set",
+            state: "output-available" as const,
+            input: { emoji: "🔧", message: "Working on it" },
+            output: { success: true, emoji: "🔧", message: "Working on it" },
+          },
+        ],
+        metadata: {
+          historySequence: 1,
+          timestamp: Date.now(),
+          model: "claude-3-5-sonnet-20241022",
+        },
+      };
+
+      // Load without active stream
+      aggregator.loadHistoricalMessages([historicalMessage], false);
+
+      // agentStatus should be reconstructed (persists across sessions)
+      expect(aggregator.getAgentStatus()).toEqual({ emoji: "🔧", message: "Working on it" });
+
+      // TODOs should NOT be reconstructed (stream-scoped)
+      expect(aggregator.getCurrentTodos()).toHaveLength(0);
+    });
+
+    test("should clear todos when new user message arrives during active stream", () => {
+      const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT);
+
+      // Simulate an active stream with todos
+      aggregator.handleStreamStart({
+        type: "stream-start",
+        workspaceId: "test-workspace",
+        messageId: "msg1",
+        historySequence: 1,
+        model: "claude-3-5-sonnet-20241022",
+      });
+
+      aggregator.handleToolCallStart({
+        messageId: "msg1",
+        toolCallId: "tool1",
+        toolName: "todo_write",
+        args: {
+          todos: [{ content: "Task", status: "completed" }],
+        },
+        tokens: 10,
+        timestamp: Date.now(),
+        type: "tool-call-start",
+        workspaceId: "test-workspace",
+      });
+
+      aggregator.handleToolCallEnd({
+        type: "tool-call-end",
+        workspaceId: "test-workspace",
+        messageId: "msg1",
+        toolCallId: "tool1",
+        toolName: "todo_write",
+        result: { success: true },
+      });
+
+      // TODOs should be set
+      expect(aggregator.getCurrentTodos()).toHaveLength(1);
+
+      // Add new user message (simulating user sending a new message)
+      aggregator.handleMessage({
+        id: "msg2",
+        role: "user",
+        parts: [{ type: "text", text: "Hello" }],
+        metadata: { historySequence: 2, timestamp: Date.now() },
+      });
+
+      // Todos should be cleared when new user message arrives
+      expect(aggregator.getCurrentTodos()).toHaveLength(0);
+    });
+  });
 });
diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts