Skip to content

Commit 1bda7fc

Browse files
committed
fixes
1 parent 26a4609 commit 1bda7fc

File tree

2 files changed

+299
-2
lines changed

2 files changed

+299
-2
lines changed

openevolve/prompt/sampler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ def _format_evolution_history(
402402
combined_programs_str = top_programs_str + diverse_programs_str
403403

404404
# Format inspirations section
405-
inspirations_section_str = self._format_inspirations_section(inspirations, language)
405+
inspirations_section_str = self._format_inspirations_section(inspirations, language, feature_dimensions)
406406

407407
# Combine into full history
408408
return history_template.format(
@@ -412,7 +412,7 @@ def _format_evolution_history(
412412
)
413413

414414
def _format_inspirations_section(
415-
self, inspirations: List[Dict[str, Any]], language: str
415+
self, inspirations: List[Dict[str, Any]], language: str, feature_dimensions: Optional[List[str]] = None
416416
) -> str:
417417
"""
418418
Format the inspirations section for the prompt
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
"""
2+
Comprehensive tests for PromptSampler including inspirations and feature_dimensions
3+
"""
4+
5+
import unittest
6+
from unittest.mock import MagicMock, patch
7+
from openevolve.config import Config
8+
from openevolve.prompt.sampler import PromptSampler
9+
10+
11+
class TestPromptSamplerComprehensive(unittest.TestCase):
12+
"""Comprehensive tests for prompt sampler edge cases"""
13+
14+
def setUp(self):
15+
"""Set up test prompt sampler"""
16+
config = Config()
17+
# Add feature dimensions to config for testing
18+
config.database.feature_dimensions = ["complexity", "memory_usage"]
19+
self.prompt_sampler = PromptSampler(config.prompt)
20+
self.feature_dimensions = config.database.feature_dimensions
21+
22+
def test_build_prompt_with_inspirations(self):
23+
"""Test building a prompt with inspiration programs"""
24+
current_program = "def optimized(): pass"
25+
parent_program = "def original(): pass"
26+
program_metrics = {
27+
"combined_score": 0.85,
28+
"accuracy": 0.9,
29+
"speed": 0.8,
30+
"complexity": 5,
31+
"memory_usage": 100
32+
}
33+
34+
# Create inspirations with diverse characteristics
35+
inspirations = [
36+
{
37+
"id": "insp1",
38+
"code": "def fast_implementation(): pass",
39+
"metrics": {
40+
"combined_score": 0.75,
41+
"accuracy": 0.7,
42+
"speed": 0.95,
43+
"complexity": 3,
44+
"memory_usage": 50
45+
},
46+
"metadata": {"diverse": True}
47+
},
48+
{
49+
"id": "insp2",
50+
"code": "def memory_efficient(): pass",
51+
"metrics": {
52+
"combined_score": 0.65,
53+
"accuracy": 0.8,
54+
"speed": 0.5,
55+
"complexity": 7,
56+
"memory_usage": 20
57+
},
58+
"metadata": {"migrant": True}
59+
}
60+
]
61+
62+
# Build prompt with inspirations and feature_dimensions
63+
prompt = self.prompt_sampler.build_prompt(
64+
current_program=current_program,
65+
parent_program=parent_program,
66+
program_metrics=program_metrics,
67+
inspirations=inspirations,
68+
feature_dimensions=self.feature_dimensions
69+
)
70+
71+
# Verify prompt was built successfully
72+
self.assertIn("system", prompt)
73+
self.assertIn("user", prompt)
74+
75+
# Check that inspirations are included
76+
self.assertIn("fast_implementation", prompt["user"])
77+
self.assertIn("memory_efficient", prompt["user"])
78+
79+
# Verify fitness scores are calculated correctly (excluding feature dimensions)
80+
# The inspirations should show their fitness scores, not including complexity/memory_usage
81+
self.assertIn("0.75", prompt["user"]) # insp1's combined_score
82+
self.assertIn("0.65", prompt["user"]) # insp2's combined_score
83+
84+
def test_format_inspirations_section_with_feature_dimensions(self):
85+
"""Test _format_inspirations_section directly with feature_dimensions"""
86+
inspirations = [
87+
{
88+
"id": "test1",
89+
"code": "def test_func(): return 42",
90+
"metrics": {
91+
"combined_score": 0.9,
92+
"accuracy": 0.95,
93+
"complexity": 10, # Feature dimension
94+
"memory_usage": 200 # Feature dimension
95+
},
96+
"metadata": {"diverse": True}
97+
}
98+
]
99+
100+
# Call the method directly
101+
result = self.prompt_sampler._format_inspirations_section(
102+
inspirations,
103+
"python",
104+
feature_dimensions=["complexity", "memory_usage"]
105+
)
106+
107+
# Should not raise NameError
108+
self.assertIsInstance(result, str)
109+
self.assertIn("test_func", result)
110+
self.assertIn("0.9000", result) # The fitness score
111+
112+
def test_format_inspirations_section_without_feature_dimensions(self):
113+
"""Test _format_inspirations_section works without feature_dimensions"""
114+
inspirations = [
115+
{
116+
"id": "test2",
117+
"code": "def another_func(): pass",
118+
"metrics": {"score": 0.7, "time": 1.2},
119+
"metadata": {}
120+
}
121+
]
122+
123+
# Call without feature_dimensions (should use default of None)
124+
result = self.prompt_sampler._format_inspirations_section(
125+
inspirations,
126+
"python"
127+
)
128+
129+
self.assertIsInstance(result, str)
130+
self.assertIn("another_func", result)
131+
132+
def test_determine_program_type_with_feature_dimensions(self):
133+
"""Test _determine_program_type with feature_dimensions parameter"""
134+
program = {
135+
"metrics": {
136+
"combined_score": 0.85,
137+
"complexity": 5,
138+
"memory_usage": 100
139+
},
140+
"metadata": {}
141+
}
142+
143+
# Test with feature_dimensions
144+
program_type = self.prompt_sampler._determine_program_type(
145+
program,
146+
feature_dimensions=["complexity", "memory_usage"]
147+
)
148+
149+
self.assertEqual(program_type, "High-Performer") # Based on combined_score of 0.85
150+
151+
def test_extract_unique_features_calls_determine_program_type(self):
152+
"""Test that _extract_unique_features correctly handles program_type determination"""
153+
program = {
154+
"code": "", # Empty code to trigger default features
155+
"metrics": {"score": 0.5},
156+
"metadata": {}
157+
}
158+
159+
# This should not raise NameError when calling _determine_program_type
160+
features = self.prompt_sampler._extract_unique_features(program)
161+
162+
self.assertIsInstance(features, str)
163+
self.assertIn("approach to the problem", features)
164+
165+
def test_build_prompt_with_all_optional_parameters(self):
166+
"""Test build_prompt with all optional parameters including inspirations"""
167+
current_program = "def main(): pass"
168+
169+
# Comprehensive test data
170+
previous_programs = [
171+
{"id": "prev1", "code": "def v1(): pass", "metrics": {"score": 0.3}}
172+
]
173+
top_programs = [
174+
{"id": "top1", "code": "def best(): pass", "metrics": {"combined_score": 0.95}}
175+
]
176+
inspirations = [
177+
{"id": "insp1", "code": "def creative(): pass", "metrics": {"score": 0.6}}
178+
]
179+
180+
prompt = self.prompt_sampler.build_prompt(
181+
current_program=current_program,
182+
parent_program="def parent(): pass",
183+
program_metrics={"combined_score": 0.7, "feature1": 10},
184+
previous_programs=previous_programs,
185+
top_programs=top_programs,
186+
inspirations=inspirations,
187+
language="python",
188+
evolution_round=5,
189+
diff_based_evolution=True,
190+
feature_dimensions=["feature1"],
191+
program_artifacts={"output": "test output"}
192+
)
193+
194+
self.assertIn("system", prompt)
195+
self.assertIn("user", prompt)
196+
# Verify all components are included
197+
self.assertIn("main", prompt["user"])
198+
self.assertIn("best", prompt["user"])
199+
self.assertIn("creative", prompt["user"])
200+
201+
def test_fitness_calculation_consistency(self):
202+
"""Test that fitness calculation is consistent across all methods"""
203+
metrics = {
204+
"combined_score": 0.8,
205+
"accuracy": 0.9,
206+
"speed": 0.7,
207+
"complexity": 5, # Feature dimension
208+
"memory_usage": 100 # Feature dimension
209+
}
210+
feature_dimensions = ["complexity", "memory_usage"]
211+
212+
# Build a prompt with these metrics
213+
prompt = self.prompt_sampler.build_prompt(
214+
current_program="def test(): pass",
215+
program_metrics=metrics,
216+
inspirations=[
217+
{"id": "i1", "code": "pass", "metrics": metrics}
218+
],
219+
feature_dimensions=feature_dimensions
220+
)
221+
222+
# The fitness score should be 0.8 (combined_score), not an average including features
223+
self.assertIn("0.8000", prompt["user"]) # Fitness score in prompt
224+
225+
def test_empty_inspirations_list(self):
226+
"""Test that empty inspirations list doesn't break anything"""
227+
prompt = self.prompt_sampler.build_prompt(
228+
current_program="def empty(): pass",
229+
inspirations=[], # Empty list
230+
feature_dimensions=["test_feature"]
231+
)
232+
233+
self.assertIn("system", prompt)
234+
self.assertIn("user", prompt)
235+
# Should complete without errors
236+
237+
def test_inspirations_with_missing_metrics(self):
238+
"""Test handling of inspirations with missing or invalid metrics"""
239+
inspirations = [
240+
{
241+
"id": "bad1",
242+
"code": "def bad(): pass",
243+
"metrics": {}, # Empty metrics
244+
},
245+
{
246+
"id": "bad2",
247+
"code": "def worse(): pass",
248+
# No metrics key at all
249+
}
250+
]
251+
252+
# Should handle gracefully without errors
253+
result = self.prompt_sampler._format_inspirations_section(
254+
inspirations,
255+
"python",
256+
feature_dimensions=["test"]
257+
)
258+
259+
self.assertIsInstance(result, str)
260+
261+
def test_feature_dimensions_none_vs_empty_list(self):
262+
"""Test that None and empty list for feature_dimensions are handled correctly"""
263+
program = {"metrics": {"score": 0.5}}
264+
265+
# Test with None
266+
type_none = self.prompt_sampler._determine_program_type(program, None)
267+
268+
# Test with empty list
269+
type_empty = self.prompt_sampler._determine_program_type(program, [])
270+
271+
# Both should work and give same result
272+
self.assertEqual(type_none, type_empty)
273+
274+
def test_feature_coordinates_formatting_in_prompt(self):
275+
"""Test that feature coordinates are formatted correctly in the prompt"""
276+
metrics = {
277+
"combined_score": 0.75,
278+
"complexity": 8,
279+
"memory_usage": 150,
280+
"cpu_usage": 0.3
281+
}
282+
283+
prompt = self.prompt_sampler.build_prompt(
284+
current_program="def test(): pass",
285+
program_metrics=metrics,
286+
feature_dimensions=["complexity", "memory_usage", "cpu_usage"]
287+
)
288+
289+
# Check that feature coordinates are included
290+
user_msg = prompt["user"]
291+
self.assertIn("complexity", user_msg)
292+
self.assertIn("memory_usage", user_msg)
293+
self.assertIn("cpu_usage", user_msg)
294+
295+
296+
if __name__ == "__main__":
297+
unittest.main()

0 commit comments

Comments
 (0)