|
1 | | -# Example configuration for LLM Interactive Proxy |
2 | | - |
3 | | -# Server settings |
4 | | -host: "0.0.0.0" |
5 | | -port: 8000 |
6 | | -proxy_timeout: 120 |
7 | | -command_prefix: "!/" |
8 | | - |
9 | | -# Authentication |
10 | | -auth: |
11 | | - disable_auth: false # Set to true to disable API key authentication |
12 | | - # NOTE: API keys should NEVER be stored in config files for security reasons |
13 | | - # Instead, set them via environment variables (see README.md) |
14 | | - api_keys: [] # API keys are read from environment variables only |
15 | | - auth_token: null # Optional auth token |
16 | | - brute_force_protection: |
17 | | - enabled: true # Enable automatic blocking for repeated invalid API keys |
18 | | - max_failed_attempts: 5 # Allow this many failures before blocking begins |
19 | | - ttl_seconds: 900 # Window (seconds) for counting failures per IP |
20 | | - initial_block_seconds: 30 # First block duration once threshold is exceeded |
21 | | - block_multiplier: 2.0 # Each subsequent block grows by this multiplier |
22 | | - max_block_seconds: 3600 # Cap the block duration to one hour |
23 | | - |
24 | | -# Session management |
25 | | -session: |
26 | | - cleanup_enabled: true |
27 | | - cleanup_interval: 3600 # 1 hour |
28 | | - max_age: 86400 # 1 day |
29 | | - default_interactive_mode: true |
30 | | - force_set_project: false |
31 | | - project_dir_resolution_model: null # Optional BACKEND:MODEL for auto-detecting project directories |
32 | | - |
33 | | - # Pytest output compression (reduces verbose test output to preserve context window) |
34 | | - pytest_compression_enabled: true # Default: true |
35 | | - pytest_compression_min_lines: 30 # Only compress output with 30+ lines (default: 30) |
36 | | - |
37 | | - # Tool call reactor steering for pytest full-suite runs (requires opt-in) |
38 | | - pytest_full_suite_steering_enabled: false |
39 | | - |
40 | | - # Fix improperly formatted <think> tags in model responses |
41 | | - fix_think_tags_enabled: false # Set to true to enable think tags correction |
42 | | - |
43 | | - # Planning phase: Route initial requests to a strong model for better planning |
44 | | - planning_phase: |
45 | | - enabled: false # Set to true to enable planning phase |
46 | | - strong_model: "openai:gpt-4" # Strong model for planning (backend:model format) |
47 | | - max_turns: 10 # Maximum turns before switching back to default model |
48 | | - max_file_writes: 1 # Maximum file writes before switching back to default model |
49 | | - |
50 | | - # Tool call processing behavior |
51 | | - # These settings control how tool calls are processed to prevent re-processing of historical messages |
52 | | - # |
53 | | - # By default, the system tracks which messages have been processed and skips re-processing |
54 | | - # historical tool calls on subsequent requests. This significantly improves performance |
55 | | - # when dealing with long conversation histories (70+ messages). |
56 | | - # |
57 | | - # force_reprocess_tool_calls: Controls whether to bypass the processing marker checks |
58 | | - # - Default: false (recommended for production) |
59 | | - # - Set to true to force reprocessing of all tool calls |
60 | | - # - Use cases: |
61 | | - # * Debugging tool call processing issues |
62 | | - # * Testing changes to tool call repair logic |
63 | | - # * Investigating unexpected behavior in tool call handling |
64 | | - # - Note: Enabling this will reduce performance with long conversation histories |
65 | | - # |
66 | | - # log_skipped_tool_calls: Controls visibility of skipped message logging |
67 | | - # - Default: false (recommended for production to reduce log noise) |
68 | | - # - Set to true to log when messages are skipped |
69 | | - # - Use cases: |
70 | | - # * Understanding which messages are being optimized |
71 | | - # * Verifying the optimization is working correctly |
72 | | - # * Development and debugging of the processing system |
73 | | - # - Note: Logs are emitted at TRACE level (level 5) to minimize noise |
74 | | - force_reprocess_tool_calls: false |
75 | | - log_skipped_tool_calls: false |
76 | | - |
77 | | -# Logging |
78 | | -logging: |
79 | | - level: "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL |
80 | | - request_logging: false |
81 | | - response_logging: false |
82 | | - log_file: null # Optional log file path |
83 | | - |
84 | | -# Backend settings |
85 | | -# IMPORTANT SECURITY NOTE: |
86 | | -# API keys should NEVER be stored in configuration files! |
87 | | -# All API keys must be set via environment variables only: |
88 | | -# - OPENROUTER_API_KEY for OpenRouter |
89 | | -# - GEMINI_API_KEY for Gemini |
90 | | -# - ANTHROPIC_API_KEY for Anthropic |
91 | | -# - ZAI_API_KEY for ZAI |
92 | | -# - GOOGLE_CLOUD_PROJECT for Google Cloud Project ID |
93 | | -# See README.md and config/sample.env for examples |
94 | | - |
95 | | -backends: |
96 | | - default_backend: "openai" |
97 | | - # hybrid_backend_repeat_messages: false # Set to true to repeat reasoning output as an artificial message in the session |
98 | | - |
99 | | - openai: |
100 | | - # API key set via OPENROUTER_API_KEY environment variable |
101 | | - api_url: null # Optional custom API URL |
102 | | - timeout: 120 |
103 | | - models: |
104 | | - - "gpt-3.5-turbo" |
105 | | - - "gpt-4" |
106 | | - - "gpt-4-turbo" |
107 | | - |
108 | | - openrouter: |
109 | | - # API key set via OPENROUTER_API_KEY environment variable |
110 | | - api_url: "https://openrouter.ai/api/v1" |
111 | | - timeout: 180 |
112 | | - |
113 | | - anthropic: |
114 | | - # API key set via ANTHROPIC_API_KEY environment variable |
115 | | - timeout: 150 |
116 | | - |
117 | | - gemini: |
118 | | - # GEMINI_API_KEY environment variable |
119 | | - timeout: 120 |
120 | | - |
121 | | - qwen_oauth: |
122 | | - # API key set via environment variables (OAuth flow) |
123 | | - timeout: 120 |
124 | | - extra: |
125 | | - # OAuth credentials configured via environment variables |
126 | | - client_id: null |
127 | | - client_secret: null |
128 | | - |
129 | | - zai: |
130 | | - # API key set via ZAI_API_KEY environment variable |
131 | | - timeout: 120 |
132 | | - |
133 | | -# Model-specific defaults |
134 | | -model_defaults: |
135 | | - "gpt-4": # Exact model name |
136 | | - temperature: 0.7 |
137 | | - |
138 | | - "openrouter:claude-3-opus": # Backend:model format |
139 | | - reasoning_effort: "high" |
140 | | - |
141 | | -# Failover routes |
142 | | -failover_routes: |
143 | | - default: |
144 | | - policy: "ordered" |
145 | | - elements: |
146 | | - - "openai:gpt-4" |
147 | | - - "openrouter:anthropic/claude-3-opus-20240229" |
148 | | - |
149 | | -# Model name rewrite rules (optional) |
150 | | -# These rules allow you to dynamically rewrite model names before they are processed |
151 | | -# Rules are processed in order, and the first matching rule is applied |
152 | | -# model_aliases: |
153 | | -# # Statically replace a specific model |
154 | | -# - pattern: "^claude-3-sonnet-20240229$" |
155 | | -# replacement: "gemini-oauth-plan:gemini-1.5-flash" |
156 | | -# |
157 | | -# # Dynamically replace any GPT model, keeping the version |
158 | | -# - pattern: "^gpt-(.*)" |
159 | | -# replacement: "openrouter:openai/gpt-\\1" |
160 | | -# |
161 | | -# # Catch-all for any other model |
162 | | -# - pattern: "^(.*)$" |
163 | | -# replacement: "gemini-oauth-plan:gemini-1.5-pro" |
| 1 | +# Example configuration for LLM Interactive Proxy |
| 2 | + |
| 3 | +# Server settings |
| 4 | +host: "0.0.0.0" |
| 5 | +port: 8000 |
| 6 | +proxy_timeout: 120 |
| 7 | +command_prefix: "!/" |
| 8 | + |
| 9 | +# Authentication |
| 10 | +auth: |
| 11 | + disable_auth: false # Set to true to disable API key authentication |
| 12 | + # NOTE: API keys should NEVER be stored in config files for security reasons |
| 13 | + # Instead, set them via environment variables (see README.md) |
| 14 | + api_keys: [] # API keys are read from environment variables only |
| 15 | + auth_token: null # Optional auth token |
| 16 | + brute_force_protection: |
| 17 | + enabled: true # Enable automatic blocking for repeated invalid API keys |
| 18 | + max_failed_attempts: 5 # Allow this many failures before blocking begins |
| 19 | + ttl_seconds: 900 # Window (seconds) for counting failures per IP |
| 20 | + initial_block_seconds: 30 # First block duration once threshold is exceeded |
| 21 | + block_multiplier: 2.0 # Each subsequent block grows by this multiplier |
| 22 | + max_block_seconds: 3600 # Cap the block duration to one hour |
| 23 | + |
| 24 | +# Session management |
| 25 | +session: |
| 26 | + cleanup_enabled: true |
| 27 | + cleanup_interval: 3600 # 1 hour |
| 28 | + max_age: 86400 # 1 day |
| 29 | + default_interactive_mode: true |
| 30 | + force_set_project: false |
| 31 | + project_dir_resolution_model: null # Optional BACKEND:MODEL for auto-detecting project directories |
| 32 | + |
| 33 | + # Pytest output compression (reduces verbose test output to preserve context window) |
| 34 | + pytest_compression_enabled: true # Default: true |
| 35 | + pytest_compression_min_lines: 30 # Only compress output with 30+ lines (default: 30) |
| 36 | + |
| 37 | + # Tool call reactor steering for pytest full-suite runs (requires opt-in) |
| 38 | + pytest_full_suite_steering_enabled: false |
| 39 | + |
| 40 | + # Fix improperly formatted <think> tags in model responses |
| 41 | + fix_think_tags_enabled: false # Set to true to enable think tags correction |
| 42 | + |
| 43 | + # Planning phase: Route initial requests to a strong model for better planning |
| 44 | + planning_phase: |
| 45 | + enabled: false # Set to true to enable planning phase |
| 46 | + strong_model: "openai:gpt-4" # Strong model for planning (backend:model format) |
| 47 | + max_turns: 10 # Maximum turns before switching back to default model |
| 48 | + max_file_writes: 1 # Maximum file writes before switching back to default model |
| 49 | + |
| 50 | + # Tool call processing behavior |
| 51 | + # These settings control how tool calls are processed to prevent re-processing of historical messages |
| 52 | + # |
| 53 | + # By default, the system tracks which messages have been processed and skips re-processing |
| 54 | + # historical tool calls on subsequent requests. This significantly improves performance |
| 55 | + # when dealing with long conversation histories (70+ messages). |
| 56 | + # |
| 57 | + # force_reprocess_tool_calls: Controls whether to bypass the processing marker checks |
| 58 | + # - Default: false (recommended for production) |
| 59 | + # - Set to true to force reprocessing of all tool calls |
| 60 | + # - Use cases: |
| 61 | + # * Debugging tool call processing issues |
| 62 | + # * Testing changes to tool call repair logic |
| 63 | + # * Investigating unexpected behavior in tool call handling |
| 64 | + # - Note: Enabling this will reduce performance with long conversation histories |
| 65 | + # |
| 66 | + # log_skipped_tool_calls: Controls visibility of skipped message logging |
| 67 | + # - Default: false (recommended for production to reduce log noise) |
| 68 | + # - Set to true to log when messages are skipped |
| 69 | + # - Use cases: |
| 70 | + # * Understanding which messages are being optimized |
| 71 | + # * Verifying the optimization is working correctly |
| 72 | + # * Development and debugging of the processing system |
| 73 | + # - Note: Logs are emitted at TRACE level (level 5) to minimize noise |
| 74 | + force_reprocess_tool_calls: false |
| 75 | + log_skipped_tool_calls: false |
| 76 | + |
| 77 | +# Logging |
| 78 | +logging: |
| 79 | + level: "INFO" # TRACE, DEBUG, INFO, WARNING, ERROR, CRITICAL |
| 80 | + request_logging: false |
| 81 | + response_logging: false |
| 82 | + log_file: null # Optional log file path |
| 83 | + |
| 84 | +# Backend settings |
| 85 | +# IMPORTANT SECURITY NOTE: |
| 86 | +# API keys should NEVER be stored in configuration files! |
| 87 | +# All API keys must be set via environment variables only: |
| 88 | +# - OPENROUTER_API_KEY for OpenRouter |
| 89 | +# - GEMINI_API_KEY for Gemini |
| 90 | +# - ANTHROPIC_API_KEY for Anthropic |
| 91 | +# - ZAI_API_KEY for ZAI |
| 92 | +# - GOOGLE_CLOUD_PROJECT for Google Cloud Project ID |
| 93 | +# See README.md and config/sample.env for examples |
| 94 | + |
| 95 | +backends: |
| 96 | + default_backend: "openai" |
| 97 | + # hybrid_backend_repeat_messages: false # Set to true to repeat reasoning output as an artificial message in the session |
| 98 | + |
| 99 | + openai: |
| 100 | + # API key set via OPENROUTER_API_KEY environment variable |
| 101 | + api_url: null # Optional custom API URL |
| 102 | + timeout: 120 |
| 103 | + models: |
| 104 | + - "gpt-3.5-turbo" |
| 105 | + - "gpt-4" |
| 106 | + - "gpt-4-turbo" |
| 107 | + |
| 108 | + openrouter: |
| 109 | + # API key set via OPENROUTER_API_KEY environment variable |
| 110 | + api_url: "https://openrouter.ai/api/v1" |
| 111 | + timeout: 180 |
| 112 | + |
| 113 | + anthropic: |
| 114 | + # API key set via ANTHROPIC_API_KEY environment variable |
| 115 | + timeout: 150 |
| 116 | + |
| 117 | + gemini: |
| 118 | + # GEMINI_API_KEY environment variable |
| 119 | + timeout: 120 |
| 120 | + |
| 121 | + qwen_oauth: |
| 122 | + # API key set via environment variables (OAuth flow) |
| 123 | + timeout: 120 |
| 124 | + extra: |
| 125 | + # OAuth credentials configured via environment variables |
| 126 | + client_id: null |
| 127 | + client_secret: null |
| 128 | + |
| 129 | + zai: |
| 130 | + # API key set via ZAI_API_KEY environment variable |
| 131 | + timeout: 120 |
| 132 | + |
| 133 | +# Model-specific defaults |
| 134 | +model_defaults: |
| 135 | + "gpt-4": # Exact model name |
| 136 | + temperature: 0.7 |
| 137 | + |
| 138 | + "openrouter:claude-3-opus": # Backend:model format |
| 139 | + reasoning_effort: "high" |
| 140 | + |
| 141 | +# Failover routes |
| 142 | +failover_routes: |
| 143 | + default: |
| 144 | + policy: "ordered" |
| 145 | + elements: |
| 146 | + - "openai:gpt-4" |
| 147 | + - "openrouter:anthropic/claude-3-opus-20240229" |
| 148 | + |
| 149 | +# Model name rewrite rules (optional) |
| 150 | +# These rules allow you to dynamically rewrite model names before they are processed |
| 151 | +# Rules are processed in order, and the first matching rule is applied |
| 152 | +# model_aliases: |
| 153 | +# # Statically replace a specific model |
| 154 | +# - pattern: "^claude-3-sonnet-20240229$" |
| 155 | +# replacement: "gemini-oauth-plan:gemini-1.5-flash" |
| 156 | +# |
| 157 | +# # Dynamically replace any GPT model, keeping the version |
| 158 | +# - pattern: "^gpt-(.*)" |
| 159 | +# replacement: "openrouter:openai/gpt-\\1" |
| 160 | +# |
| 161 | +# # Catch-all for any other model |
| 162 | +# - pattern: "^(.*)$" |
| 163 | +# replacement: "gemini-oauth-plan:gemini-1.5-pro" |
0 commit comments