Skip to content

Commit 05ea2a6

Browse files
oleshoclaude
andcommitted
Refactor config.yml to use example files for different providers
Created separate example config files for different model providers: - config.example.openai.yml: OpenAI models (now default) - config.example.openrouter-gemini.yml: OpenRouter with Gemini - config.example.openrouter-gpt.yml: OpenRouter with GPT Main config.yml now defaults to OpenAI provider for reliability. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 1fb0bc1 commit 05ea2a6

File tree

4 files changed

+190
-41
lines changed

4 files changed

+190
-41
lines changed

evals/config.example.openai.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Evaluation Framework Configuration
2+
# This configuration is shared across all evaluation runner scripts
3+
# Example configuration for OpenAI models
4+
5+
# API endpoint for the evaluation server
6+
api_endpoint: "http://localhost:8080"
7+
8+
# Model configurations for running evaluations
9+
# These models are sent to the agent for processing requests
10+
11+
main_model:
12+
provider: "openai"
13+
model_name: "gpt-5-mini"
14+
api_key: "${OPENAI_API_KEY}"
15+
16+
mini_model:
17+
provider: "openai"
18+
model_name: "gpt-5-nano"
19+
api_key: "${OPENAI_API_KEY}"
20+
21+
nano_model:
22+
provider: "openai"
23+
model_name: "gpt-5-nano"
24+
api_key: "${OPENAI_API_KEY}"
25+
26+
# Model configuration for judging evaluation responses
27+
# This model is used locally to assess the quality of agent responses
28+
29+
judge_model:
30+
provider: "openai"
31+
model_name: "gpt-5"
32+
api_key: "${OPENAI_API_KEY}"
33+
# temperature: 0.1 # GPT-5 doesn't support custom temperature
34+
35+
# Execution settings
36+
37+
execution:
38+
# Default number of evaluations to run per script execution
39+
default_limit: 20
40+
41+
# Timeout for API requests (seconds) - set to max for slow custom API
42+
timeout: 3600
43+
44+
# Number of concurrent evaluation requests
45+
concurrent_requests: 1
46+
47+
# Delay between requests (seconds)
48+
request_delay: 1
49+
50+
# Reporting settings
51+
52+
reporting:
53+
# Directory for storing evaluation reports
54+
reports_dir: "reports"
55+
56+
# Report format
57+
format: "csv"
58+
59+
# Include detailed judge reasoning in reports
60+
include_reasoning: true
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Evaluation Framework Configuration
2+
# This configuration is shared across all evaluation runner scripts
3+
# Example configuration for OpenRouter with Google Gemini models
4+
5+
# API endpoint for the evaluation server
6+
api_endpoint: "http://localhost:8080"
7+
8+
# Model configurations for running evaluations
9+
# These models are sent to the agent for processing requests
10+
11+
main_model:
12+
provider: "openrouter"
13+
model_name: "google/gemini-2.0-flash-exp:free"
14+
api_key: "${OPENROUTER_API_KEY}"
15+
16+
mini_model:
17+
provider: "openrouter"
18+
model_name: "google/gemini-2.0-flash-exp:free"
19+
api_key: "${OPENROUTER_API_KEY}"
20+
21+
nano_model:
22+
provider: "openrouter"
23+
model_name: "google/gemini-2.0-flash-exp:free"
24+
api_key: "${OPENROUTER_API_KEY}"
25+
26+
# Model configuration for judging evaluation responses
27+
# This model is used locally to assess the quality of agent responses
28+
29+
judge_model:
30+
provider: "openai"
31+
model_name: "gpt-5"
32+
api_key: "${OPENAI_API_KEY}"
33+
# temperature: 0.1 # GPT-5 doesn't support custom temperature
34+
35+
# Execution settings
36+
37+
execution:
38+
# Default number of evaluations to run per script execution
39+
default_limit: 20
40+
41+
# Timeout for API requests (seconds) - set to max for slow custom API
42+
timeout: 3600
43+
44+
# Number of concurrent evaluation requests
45+
concurrent_requests: 1
46+
47+
# Delay between requests (seconds)
48+
request_delay: 1
49+
50+
# Reporting settings
51+
52+
reporting:
53+
# Directory for storing evaluation reports
54+
reports_dir: "reports"
55+
56+
# Report format
57+
format: "csv"
58+
59+
# Include detailed judge reasoning in reports
60+
include_reasoning: true
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Evaluation Framework Configuration
2+
# This configuration is shared across all evaluation runner scripts
3+
# Example configuration for OpenRouter with GPT models
4+
5+
# API endpoint for the evaluation server
6+
api_endpoint: "http://localhost:8080"
7+
8+
# Model configurations for running evaluations
9+
# These models are sent to the agent for processing requests
10+
11+
main_model:
12+
provider: "openrouter"
13+
model_name: "openai/gpt-oss-20b:free"
14+
api_key: "${OPENROUTER_API_KEY}"
15+
16+
mini_model:
17+
provider: "openrouter"
18+
model_name: "openai/gpt-oss-20b:free"
19+
api_key: "${OPENROUTER_API_KEY}"
20+
21+
nano_model:
22+
provider: "openrouter"
23+
model_name: "openai/gpt-oss-20b:free"
24+
api_key: "${OPENROUTER_API_KEY}"
25+
26+
# Model configuration for judging evaluation responses
27+
# This model is used locally to assess the quality of agent responses
28+
29+
judge_model:
30+
provider: "openai"
31+
model_name: "gpt-5"
32+
api_key: "${OPENAI_API_KEY}"
33+
# temperature: 0.1 # GPT-5 doesn't support custom temperature
34+
35+
# Execution settings
36+
37+
execution:
38+
# Default number of evaluations to run per script execution
39+
default_limit: 20
40+
41+
# Timeout for API requests (seconds) - set to max for slow custom API
42+
timeout: 3600
43+
44+
# Number of concurrent evaluation requests
45+
concurrent_requests: 1
46+
47+
# Delay between requests (seconds)
48+
request_delay: 1
49+
50+
# Reporting settings
51+
52+
reporting:
53+
# Directory for storing evaluation reports
54+
reports_dir: "reports"
55+
56+
# Report format
57+
format: "csv"
58+
59+
# Include detailed judge reasoning in reports
60+
include_reasoning: true

evals/config.yml

Lines changed: 10 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -6,53 +6,22 @@ api_endpoint: "http://localhost:8080"
66

77
# Model configurations for running evaluations
88
# These models are sent to the agent for processing requests
9-
10-
# main_model:
11-
# provider: "openai"
12-
# model_name: "gpt-5-mini"
13-
# api_key: "${OPENAI_API_KEY}"
14-
15-
# mini_model:
16-
# provider: "openai"
17-
# model_name: "gpt-5-nano"
18-
# api_key: "${OPENAI_API_KEY}"
19-
20-
# nano_model:
21-
# provider: "openai"
22-
# model_name: "gpt-5-nano"
23-
# api_key: "${OPENAI_API_KEY}"
24-
25-
# main_model:
26-
# provider: "openrouter"
27-
# model_name: "openai/gpt-oss-20b:free"
28-
# # model_name: "tngtech/deepseek-r1t2-chimera:free"
29-
# api_key: "${OPENROUTER_API_KEY}"
30-
31-
# mini_model:
32-
# provider: "openrouter"
33-
# model_name: "openai/gpt-oss-20b:free"
34-
# api_key: "${OPENROUTER_API_KEY}"
35-
36-
# nano_model:
37-
# provider: "openrouter"
38-
# model_name: "openai/gpt-oss-20b:free"
39-
# api_key: "${OPENROUTER_API_KEY}"
9+
# See config.example.*.yml files for other provider/model configurations
4010

4111
main_model:
42-
provider: "openrouter"
43-
model_name: "google/gemini-2.0-flash-exp:free"
44-
# model_name: "tngtech/deepseek-r1t2-chimera:free"
45-
api_key: "${OPENROUTER_API_KEY}"
12+
provider: "openai"
13+
model_name: "gpt-5-mini"
14+
api_key: "${OPENAI_API_KEY}"
4615

4716
mini_model:
48-
provider: "openrouter"
49-
model_name: "google/gemini-2.0-flash-exp:free"
50-
api_key: "${OPENROUTER_API_KEY}"
17+
provider: "openai"
18+
model_name: "gpt-5-nano"
19+
api_key: "${OPENAI_API_KEY}"
5120

5221
nano_model:
53-
provider: "openrouter"
54-
model_name: "google/gemini-2.0-flash-exp:free"
55-
api_key: "${OPENROUTER_API_KEY}"
22+
provider: "openai"
23+
model_name: "gpt-5-nano"
24+
api_key: "${OPENAI_API_KEY}"
5625

5726
# Model configuration for judging evaluation responses
5827
# This model is used locally to assess the quality of agent responses

0 commit comments

Comments
 (0)