Skip to content

Commit 28017a9

Browse files
author
matdev83
committed
feat: Force reasoning on first turn and add TRACE logging
This commit introduces two main improvements: 1. **Reasoning on First Turn:** The HybridConnector now ensures the reasoning model is always used on the first user turn. This guarantees a better initial response and sets a stronger context for the rest of the conversation. 2. **TRACE Logging Level:** A new TRACE logging level (level 5) has been added for more granular debugging. This is used in the ChatController and HybridConnector to provide deeper insights into request processing and stream conversion. Additionally, the �nthropic_converters module has been refactored for improved robustness and maintainability, especially in the stream conversion logic.
1 parent cd4238f commit 28017a9

File tree

9 files changed

+2369
-2068
lines changed

9 files changed

+2369
-2068
lines changed

config/config.example.yaml

Lines changed: 163 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -1,163 +1,163 @@
1-
# Example configuration for LLM Interactive Proxy
2-
3-
# Server settings
4-
host: "0.0.0.0"
5-
port: 8000
6-
proxy_timeout: 120
7-
command_prefix: "!/"
8-
9-
# Authentication
10-
auth:
11-
disable_auth: false # Set to true to disable API key authentication
12-
# NOTE: API keys should NEVER be stored in config files for security reasons
13-
# Instead, set them via environment variables (see README.md)
14-
api_keys: [] # API keys are read from environment variables only
15-
auth_token: null # Optional auth token
16-
brute_force_protection:
17-
enabled: true # Enable automatic blocking for repeated invalid API keys
18-
max_failed_attempts: 5 # Allow this many failures before blocking begins
19-
ttl_seconds: 900 # Window (seconds) for counting failures per IP
20-
initial_block_seconds: 30 # First block duration once threshold is exceeded
21-
block_multiplier: 2.0 # Each subsequent block grows by this multiplier
22-
max_block_seconds: 3600 # Cap the block duration to one hour
23-
24-
# Session management
25-
session:
26-
cleanup_enabled: true
27-
cleanup_interval: 3600 # 1 hour
28-
max_age: 86400 # 1 day
29-
default_interactive_mode: true
30-
force_set_project: false
31-
project_dir_resolution_model: null # Optional BACKEND:MODEL for auto-detecting project directories
32-
33-
# Pytest output compression (reduces verbose test output to preserve context window)
34-
pytest_compression_enabled: true # Default: true
35-
pytest_compression_min_lines: 30 # Only compress output with 30+ lines (default: 30)
36-
37-
# Tool call reactor steering for pytest full-suite runs (requires opt-in)
38-
pytest_full_suite_steering_enabled: false
39-
40-
# Fix improperly formatted <think> tags in model responses
41-
fix_think_tags_enabled: false # Set to true to enable think tags correction
42-
43-
# Planning phase: Route initial requests to a strong model for better planning
44-
planning_phase:
45-
enabled: false # Set to true to enable planning phase
46-
strong_model: "openai:gpt-4" # Strong model for planning (backend:model format)
47-
max_turns: 10 # Maximum turns before switching back to default model
48-
max_file_writes: 1 # Maximum file writes before switching back to default model
49-
50-
# Tool call processing behavior
51-
# These settings control how tool calls are processed to prevent re-processing of historical messages
52-
#
53-
# By default, the system tracks which messages have been processed and skips re-processing
54-
# historical tool calls on subsequent requests. This significantly improves performance
55-
# when dealing with long conversation histories (70+ messages).
56-
#
57-
# force_reprocess_tool_calls: Controls whether to bypass the processing marker checks
58-
# - Default: false (recommended for production)
59-
# - Set to true to force reprocessing of all tool calls
60-
# - Use cases:
61-
# * Debugging tool call processing issues
62-
# * Testing changes to tool call repair logic
63-
# * Investigating unexpected behavior in tool call handling
64-
# - Note: Enabling this will reduce performance with long conversation histories
65-
#
66-
# log_skipped_tool_calls: Controls visibility of skipped message logging
67-
# - Default: false (recommended for production to reduce log noise)
68-
# - Set to true to log when messages are skipped
69-
# - Use cases:
70-
# * Understanding which messages are being optimized
71-
# * Verifying the optimization is working correctly
72-
# * Development and debugging of the processing system
73-
# - Note: Logs are emitted at TRACE level (level 5) to minimize noise
74-
force_reprocess_tool_calls: false
75-
log_skipped_tool_calls: false
76-
77-
# Logging
78-
logging:
79-
level: "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
80-
request_logging: false
81-
response_logging: false
82-
log_file: null # Optional log file path
83-
84-
# Backend settings
85-
# IMPORTANT SECURITY NOTE:
86-
# API keys should NEVER be stored in configuration files!
87-
# All API keys must be set via environment variables only:
88-
# - OPENROUTER_API_KEY for OpenRouter
89-
# - GEMINI_API_KEY for Gemini
90-
# - ANTHROPIC_API_KEY for Anthropic
91-
# - ZAI_API_KEY for ZAI
92-
# - GOOGLE_CLOUD_PROJECT for Google Cloud Project ID
93-
# See README.md and config/sample.env for examples
94-
95-
backends:
96-
default_backend: "openai"
97-
# hybrid_backend_repeat_messages: false # Set to true to repeat reasoning output as an artificial message in the session
98-
99-
openai:
100-
# API key set via OPENROUTER_API_KEY environment variable
101-
api_url: null # Optional custom API URL
102-
timeout: 120
103-
models:
104-
- "gpt-3.5-turbo"
105-
- "gpt-4"
106-
- "gpt-4-turbo"
107-
108-
openrouter:
109-
# API key set via OPENROUTER_API_KEY environment variable
110-
api_url: "https://openrouter.ai/api/v1"
111-
timeout: 180
112-
113-
anthropic:
114-
# API key set via ANTHROPIC_API_KEY environment variable
115-
timeout: 150
116-
117-
gemini:
118-
# GEMINI_API_KEY environment variable
119-
timeout: 120
120-
121-
qwen_oauth:
122-
# API key set via environment variables (OAuth flow)
123-
timeout: 120
124-
extra:
125-
# OAuth credentials configured via environment variables
126-
client_id: null
127-
client_secret: null
128-
129-
zai:
130-
# API key set via ZAI_API_KEY environment variable
131-
timeout: 120
132-
133-
# Model-specific defaults
134-
model_defaults:
135-
"gpt-4": # Exact model name
136-
temperature: 0.7
137-
138-
"openrouter:claude-3-opus": # Backend:model format
139-
reasoning_effort: "high"
140-
141-
# Failover routes
142-
failover_routes:
143-
default:
144-
policy: "ordered"
145-
elements:
146-
- "openai:gpt-4"
147-
- "openrouter:anthropic/claude-3-opus-20240229"
148-
149-
# Model name rewrite rules (optional)
150-
# These rules allow you to dynamically rewrite model names before they are processed
151-
# Rules are processed in order, and the first matching rule is applied
152-
# model_aliases:
153-
# # Statically replace a specific model
154-
# - pattern: "^claude-3-sonnet-20240229$"
155-
# replacement: "gemini-oauth-plan:gemini-1.5-flash"
156-
#
157-
# # Dynamically replace any GPT model, keeping the version
158-
# - pattern: "^gpt-(.*)"
159-
# replacement: "openrouter:openai/gpt-\\1"
160-
#
161-
# # Catch-all for any other model
162-
# - pattern: "^(.*)$"
163-
# replacement: "gemini-oauth-plan:gemini-1.5-pro"
1+
# Example configuration for LLM Interactive Proxy
2+
3+
# Server settings
4+
host: "0.0.0.0"
5+
port: 8000
6+
proxy_timeout: 120
7+
command_prefix: "!/"
8+
9+
# Authentication
10+
auth:
11+
disable_auth: false # Set to true to disable API key authentication
12+
# NOTE: API keys should NEVER be stored in config files for security reasons
13+
# Instead, set them via environment variables (see README.md)
14+
api_keys: [] # API keys are read from environment variables only
15+
auth_token: null # Optional auth token
16+
brute_force_protection:
17+
enabled: true # Enable automatic blocking for repeated invalid API keys
18+
max_failed_attempts: 5 # Allow this many failures before blocking begins
19+
ttl_seconds: 900 # Window (seconds) for counting failures per IP
20+
initial_block_seconds: 30 # First block duration once threshold is exceeded
21+
block_multiplier: 2.0 # Each subsequent block grows by this multiplier
22+
max_block_seconds: 3600 # Cap the block duration to one hour
23+
24+
# Session management
25+
session:
26+
cleanup_enabled: true
27+
cleanup_interval: 3600 # 1 hour
28+
max_age: 86400 # 1 day
29+
default_interactive_mode: true
30+
force_set_project: false
31+
project_dir_resolution_model: null # Optional BACKEND:MODEL for auto-detecting project directories
32+
33+
# Pytest output compression (reduces verbose test output to preserve context window)
34+
pytest_compression_enabled: true # Default: true
35+
pytest_compression_min_lines: 30 # Only compress output with 30+ lines (default: 30)
36+
37+
# Tool call reactor steering for pytest full-suite runs (requires opt-in)
38+
pytest_full_suite_steering_enabled: false
39+
40+
# Fix improperly formatted <think> tags in model responses
41+
fix_think_tags_enabled: false # Set to true to enable think tags correction
42+
43+
# Planning phase: Route initial requests to a strong model for better planning
44+
planning_phase:
45+
enabled: false # Set to true to enable planning phase
46+
strong_model: "openai:gpt-4" # Strong model for planning (backend:model format)
47+
max_turns: 10 # Maximum turns before switching back to default model
48+
max_file_writes: 1 # Maximum file writes before switching back to default model
49+
50+
# Tool call processing behavior
51+
# These settings control how tool calls are processed to prevent re-processing of historical messages
52+
#
53+
# By default, the system tracks which messages have been processed and skips re-processing
54+
# historical tool calls on subsequent requests. This significantly improves performance
55+
# when dealing with long conversation histories (70+ messages).
56+
#
57+
# force_reprocess_tool_calls: Controls whether to bypass the processing marker checks
58+
# - Default: false (recommended for production)
59+
# - Set to true to force reprocessing of all tool calls
60+
# - Use cases:
61+
# * Debugging tool call processing issues
62+
# * Testing changes to tool call repair logic
63+
# * Investigating unexpected behavior in tool call handling
64+
# - Note: Enabling this will reduce performance with long conversation histories
65+
#
66+
# log_skipped_tool_calls: Controls visibility of skipped message logging
67+
# - Default: false (recommended for production to reduce log noise)
68+
# - Set to true to log when messages are skipped
69+
# - Use cases:
70+
# * Understanding which messages are being optimized
71+
# * Verifying the optimization is working correctly
72+
# * Development and debugging of the processing system
73+
# - Note: Logs are emitted at TRACE level (level 5) to minimize noise
74+
force_reprocess_tool_calls: false
75+
log_skipped_tool_calls: false
76+
77+
# Logging
78+
logging:
79+
level: "INFO" # TRACE, DEBUG, INFO, WARNING, ERROR, CRITICAL
80+
request_logging: false
81+
response_logging: false
82+
log_file: null # Optional log file path
83+
84+
# Backend settings
85+
# IMPORTANT SECURITY NOTE:
86+
# API keys should NEVER be stored in configuration files!
87+
# All API keys must be set via environment variables only:
88+
# - OPENROUTER_API_KEY for OpenRouter
89+
# - GEMINI_API_KEY for Gemini
90+
# - ANTHROPIC_API_KEY for Anthropic
91+
# - ZAI_API_KEY for ZAI
92+
# - GOOGLE_CLOUD_PROJECT for Google Cloud Project ID
93+
# See README.md and config/sample.env for examples
94+
95+
backends:
96+
default_backend: "openai"
97+
# hybrid_backend_repeat_messages: false # Set to true to repeat reasoning output as an artificial message in the session
98+
99+
openai:
100+
# API key set via OPENROUTER_API_KEY environment variable
101+
api_url: null # Optional custom API URL
102+
timeout: 120
103+
models:
104+
- "gpt-3.5-turbo"
105+
- "gpt-4"
106+
- "gpt-4-turbo"
107+
108+
openrouter:
109+
# API key set via OPENROUTER_API_KEY environment variable
110+
api_url: "https://openrouter.ai/api/v1"
111+
timeout: 180
112+
113+
anthropic:
114+
# API key set via ANTHROPIC_API_KEY environment variable
115+
timeout: 150
116+
117+
gemini:
118+
# GEMINI_API_KEY environment variable
119+
timeout: 120
120+
121+
qwen_oauth:
122+
# API key set via environment variables (OAuth flow)
123+
timeout: 120
124+
extra:
125+
# OAuth credentials configured via environment variables
126+
client_id: null
127+
client_secret: null
128+
129+
zai:
130+
# API key set via ZAI_API_KEY environment variable
131+
timeout: 120
132+
133+
# Model-specific defaults
134+
model_defaults:
135+
"gpt-4": # Exact model name
136+
temperature: 0.7
137+
138+
"openrouter:claude-3-opus": # Backend:model format
139+
reasoning_effort: "high"
140+
141+
# Failover routes
142+
failover_routes:
143+
default:
144+
policy: "ordered"
145+
elements:
146+
- "openai:gpt-4"
147+
- "openrouter:anthropic/claude-3-opus-20240229"
148+
149+
# Model name rewrite rules (optional)
150+
# These rules allow you to dynamically rewrite model names before they are processed
151+
# Rules are processed in order, and the first matching rule is applied
152+
# model_aliases:
153+
# # Statically replace a specific model
154+
# - pattern: "^claude-3-sonnet-20240229$"
155+
# replacement: "gemini-oauth-plan:gemini-1.5-flash"
156+
#
157+
# # Dynamically replace any GPT model, keeping the version
158+
# - pattern: "^gpt-(.*)"
159+
# replacement: "openrouter:openai/gpt-\\1"
160+
#
161+
# # Catch-all for any other model
162+
# - pattern: "^(.*)$"
163+
# replacement: "gemini-oauth-plan:gemini-1.5-pro"

data/test_suite_state.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
2-
"test_count": 5117,
2+
"test_count": 5119,
33
"last_updated": "1762168167.0802596"
44
}

0 commit comments

Comments
 (0)