matdev83
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 11 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 11 deletions
diff --git a/‎config/config.example.yaml‎
Lines changed: 6 additions & 5 deletions b/‎config/config.example.yaml‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎config/sample.env‎
Lines changed: 2 additions & 0 deletions b/‎config/sample.env‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎data/test_suite_state.json‎
Lines changed: 1 addition & 1 deletion b/‎data/test_suite_state.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sample_wire_capture.jsonl‎
Lines changed: 4 additions & 0 deletions b/‎sample_wire_capture.jsonl‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/connectors/hybrid.py‎
Lines changed: 26 additions & 5 deletions b/‎src/connectors/hybrid.py‎
Lines changed: 26 additions & 5 deletions
@@ -2,8 +2,6 @@
 
 ## [2025-11-04]
 
-### Added
-
 - **Hybrid Backend Repeat Messages Feature**: New configuration option to repeat reasoning output as an artificial message in the session
   - **New Configuration Option**: `--hybrid-backend-repeat-messages` CLI flag and `HYBRID_BACKEND_REPEAT_MESSAGES` environment variable to enable the feature
   - **Artificial Message Injection**: When enabled, reasoning output is added as an artificial assistant message in the conversation history
@@ -23,7 +21,6 @@
 
 ## [2025-10-31]
 
-
 - **XML Tool Call Format Support**: Added support for XML tool call format in ToolCallRepairService
   - XML pattern detection and parsing for Kilo MCP tools
   - Support for both direct XML tool format and use_mcp_tool wrapper format
@@ -44,8 +41,6 @@
 
 ## [2025-10-23]
 
-### Added
-
 - **Intelligent Session Management**: Autonomous session continuity detection via message history fingerprinting
   - **Context Loss Prevention**: Eliminates session loss for stateless clients (e.g., Kilo Code, Cursor) that don't send session IDs
   - **Message History Fingerprinting**: Computes stable hashes from conversation sequences to detect continuity
@@ -65,8 +60,6 @@
 
 ## [2025-01-21]
 
-### Added
-
 - **LLM Assessment System**: Intelligent conversation quality monitoring inspired by Google's gemini-cli
   - Automatically detects unproductive patterns like repetitive tool calls and cognitive loops
   - Event-driven assessment triggers after configurable turn thresholds (default: 30 turns)
@@ -77,7 +70,7 @@
   - Graceful degradation - assessment failures never break main conversation flow
   - Complete documentation in README.md with configuration examples and use cases
 
-# 2025-10-17 - Gemini OAuth Backend Refactoring
+## 2025-10-17 - Gemini OAuth Backend Refactoring
 
 - **Refactor**: Split `gemini-oauth-personal` backend into two specialized backends for different use cases
   - **New Backend**: `gemini-oauth-free` for free-tier Gemini API usage with appropriate quotas and limits
@@ -86,7 +79,7 @@
   - **Migration**: Existing configurations automatically redirect to appropriate backend based on authentication type
   - **Testing**: Comprehensive test suites created for both new backends with full coverage of OAuth flows and API interactions
 
-# 2025-10-16 - Command Pipeline Policy & Regression Coverage
+## 2025-10-16 - Command Pipeline Policy & Regression Coverage
 
 - **Dependency Injection**: Command services now require explicit `ICommandPolicyService`
   and `ICommandStateService` instances. `CommandStage` wires the policy/state helpers,
@@ -288,8 +281,6 @@
 
 - **Maintenance**: Various code quality improvements including import organization, unused import removal, and code formatting consistency
 
-# Changelog
-
 ## 2025-10-01 - Refactor: Translation Service and Gemini Request Counting
 
 - **Refactor**: Centralized all request/response translation logic into a new `TranslationService` (`src/core/services/translation_service.py`). This improves modularity, simplifies maintenance, and makes it easier to add new API formats.
 
@@ -94,6 +94,7 @@ logging:
 
 backends:
   default_backend: "openai"
+  # hybrid_backend_repeat_messages: false  # Set to true to repeat reasoning output as an artificial message in the session
 
   openai:
     # API key set via OPENROUTER_API_KEY environment variable
@@ -114,7 +115,7 @@ backends:
     timeout: 150
 
   gemini:
-    # API key set via GEMINI_API_KEY environment variable
+    # GEMINI_API_KEY environment variable
     timeout: 120
 
   qwen_oauth:
@@ -140,10 +141,10 @@ model_defaults:
 # Failover routes
 failover_routes:
   default:
-    policy: "ordered"
-    elements:
-      - "openai:gpt-4"
-      - "openrouter:anthropic/claude-3-opus-20240229"
+   policy: "ordered"
+   elements:
+     - "openai:gpt-4"
+     - "openrouter:anthropic/claude-3-opus-20240229"
 
 # Model name rewrite rules (optional)
 # These rules allow you to dynamically rewrite model names before they are processed
 
@@ -49,6 +49,8 @@ TOOL_CALL_REPAIR_BUFFER_CAP_BYTES=65536
 FORCE_REPROCESS_TOOL_CALLS=false
 # Log when tool calls are skipped (useful for visibility during development, default: false)
 # When enabled, logs will show which messages are being skipped to help understand the optimization
+# Enable hybrid backend to repeat reasoning messages as artificial messages in the session (default: false)
+HYBRID_BACKEND_REPEAT_MESSAGES=false
 LOG_SKIPPED_TOOL_CALLS=false
 
 # Loop Detection Settings
 
@@ -1,4 +1,4 @@
 {
-  "test_count": 5057,
+  "test_count": 5058,
   "last_updated": "1762168167.0802596"
 }
@@ -0,0 +1,4 @@
+{"timestamp_iso": "2025-01-10T15:58:41.039145+00:00", "timestamp_unix": 1736524721.039145, "direction": "inbound_response", "source": "127.0.0.1(Cline/1.0)", "destination": "qwen-oauth", "session_id": "session-abc123", "backend": "qwen-oauth", "model": "qwen3-coder-plus", "content_type": "json", "content_length": 1247, "payload": {"choices": [{"message": {"content": "Failed to edit, could not find the string to replace in the file. The SEARCH block content doesn't match exactly."}}], "usage": {"total_tokens": 150}}, "metadata": {"client_host": "127.0.0.1", "user_agent": "Cline/1.0", "request_id": "req_abc123"}}
+{"timestamp_iso": "2025-01-10T15:58:42.156234+00:00", "timestamp_unix": 1736524722.156234, "direction": "outbound_request", "source": "127.0.0.1(Cline/1.0)", "destination": "openai", "session_id": "session-abc123", "backend": "openai", "model": "gpt-4o", "content_type": "json", "content_length": 892, "payload": {"messages": [{"role": "user", "content": "I need to make changes to this file but the previous edit failed"}], "temperature": 0.1, "top_p": 0.3}, "metadata": {"client_host": "127.0.0.1", "user_agent": "Cline/1.0", "request_id": "req_abc124", "_edit_precision_mode": true, "_edit_precision_meta": {"original_temperature": 0.7, "original_top_p": 0.8, "applied_temperature": 0.1, "applied_top_p": 0.3}}}
+{"timestamp_iso": "2025-01-10T15:59:10.201456+00:00", "timestamp_unix": 1736524750.201456, "direction": "inbound_response", "source": "127.0.0.1(Cline/1.0)", "destination": "openai", "session_id": "session-abc123", "backend": "openai", "model": "gpt-4o", "content_type": "json", "content_length": 956, "payload": {"choices": [{"message": {"content": "Error: [patch_file] Error - old_string not found in content", "tool_calls": [{"function": {"name": "patch_file", "arguments": "{\"path\": \"example.py\", \"old_string\": \"def old_func():\", \"new_string\": \"def new_func():\"}", "status": "error"}}]}}], "usage": {"total_tokens": 200}}, "metadata": {"client_host": "127.0.0.1", "user_agent": "Cline/1.0", "request_id": "req_abc125"}}
+{"timestamp_iso": "2025-01-10T15:59:11.892789+00:00", "timestamp_unix": 1736524751.892789, "direction": "outbound_request", "source": "127.0.0.1(Cline/1.0)", "destination": "anthropic", "session_id": "session-abc123", "backend": "anthropic", "model": "claude-3-5-sonnet", "content_type": "json", "content_length": 1023, "payload": {"messages": [{"role": "user", "content": "I need to fix the previous edit that failed"}], "temperature": 0.1, "top_p": 0.3}, "metadata": {"client_host": "127.0.0.1", "user_agent": "Cline/1.0", "request_id": "req_abc126", "_edit_precision_mode": true, "_edit_precision_meta": {"original_temperature": 0.5, "original_top_p": 0.7, "applied_temperature": 0.1, "applied_top_p": 0.3}}}
@@ -1833,12 +1833,33 @@ async def chat_completions(
         has_reasoning_content = False
         reasoning_time = 0.0
 
-        # Decide whether to use the reasoning model
-        use_reasoning = (
-            random.random() < self.config.backends.reasoning_injection_probability
-        )
+        # Check for temporary reasoning injection probability override from edit precision middleware
+        temp_reasoning_probability = None
+        if isinstance(request_data, dict):
+            extra_body = request_data.get("extra_body", {})
+        else:
+            extra_body = getattr(request_data, "extra_body", {})
+            if extra_body is None:
+                extra_body = {}
+
+        # Check if edit precision middleware has set a temporary override
+        temp_prob_override = extra_body.get("_temp_hybrid_reasoning_probability")
+        if temp_prob_override is not None:
+            temp_reasoning_probability = float(temp_prob_override)
+            # Log that we're using a temporary override
+            logger.info(
+                f"Using temporary reasoning injection probability override: {temp_reasoning_probability} for session",
+                extra={"session_id": session_id},
+            )
+        else:
+            temp_reasoning_probability = (
+                self.config.backends.reasoning_injection_probability
+            )
+
+        # Decide whether to use the reasoning model with the (potentially overridden) probability
+        use_reasoning = random.random() < temp_reasoning_probability
         logger.info(
-            f"Reasoning model injection decision: {'USE' if use_reasoning else 'SKIP'}"
+            f"Reasoning model injection decision: {'USE' if use_reasoning else 'SKIP'}, probability={temp_reasoning_probability}"
         )
 
         if use_reasoning:
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`	`1`	`{`
`2`		`- "test_count": 5057,`
	`2`	`+ "test_count": 5058,`
`3`	`3`	`"last_updated": "1762168167.0802596"`
`4`	`4`	`}`