1+ """
2+ Comprehensive tests for PromptSampler including inspirations and feature_dimensions
3+ """
4+
5+ import unittest
6+ from unittest .mock import MagicMock , patch
7+ from openevolve .config import Config
8+ from openevolve .prompt .sampler import PromptSampler
9+
10+
11+ class TestPromptSamplerComprehensive (unittest .TestCase ):
12+ """Comprehensive tests for prompt sampler edge cases"""
13+
14+ def setUp (self ):
15+ """Set up test prompt sampler"""
16+ config = Config ()
17+ # Add feature dimensions to config for testing
18+ config .database .feature_dimensions = ["complexity" , "memory_usage" ]
19+ self .prompt_sampler = PromptSampler (config .prompt )
20+ self .feature_dimensions = config .database .feature_dimensions
21+
22+ def test_build_prompt_with_inspirations (self ):
23+ """Test building a prompt with inspiration programs"""
24+ current_program = "def optimized(): pass"
25+ parent_program = "def original(): pass"
26+ program_metrics = {
27+ "combined_score" : 0.85 ,
28+ "accuracy" : 0.9 ,
29+ "speed" : 0.8 ,
30+ "complexity" : 5 ,
31+ "memory_usage" : 100
32+ }
33+
34+ # Create inspirations with diverse characteristics
35+ inspirations = [
36+ {
37+ "id" : "insp1" ,
38+ "code" : "def fast_implementation(): pass" ,
39+ "metrics" : {
40+ "combined_score" : 0.75 ,
41+ "accuracy" : 0.7 ,
42+ "speed" : 0.95 ,
43+ "complexity" : 3 ,
44+ "memory_usage" : 50
45+ },
46+ "metadata" : {"diverse" : True }
47+ },
48+ {
49+ "id" : "insp2" ,
50+ "code" : "def memory_efficient(): pass" ,
51+ "metrics" : {
52+ "combined_score" : 0.65 ,
53+ "accuracy" : 0.8 ,
54+ "speed" : 0.5 ,
55+ "complexity" : 7 ,
56+ "memory_usage" : 20
57+ },
58+ "metadata" : {"migrant" : True }
59+ }
60+ ]
61+
62+ # Build prompt with inspirations and feature_dimensions
63+ prompt = self .prompt_sampler .build_prompt (
64+ current_program = current_program ,
65+ parent_program = parent_program ,
66+ program_metrics = program_metrics ,
67+ inspirations = inspirations ,
68+ feature_dimensions = self .feature_dimensions
69+ )
70+
71+ # Verify prompt was built successfully
72+ self .assertIn ("system" , prompt )
73+ self .assertIn ("user" , prompt )
74+
75+ # Check that inspirations are included
76+ self .assertIn ("fast_implementation" , prompt ["user" ])
77+ self .assertIn ("memory_efficient" , prompt ["user" ])
78+
79+ # Verify fitness scores are calculated correctly (excluding feature dimensions)
80+ # The inspirations should show their fitness scores, not including complexity/memory_usage
81+ self .assertIn ("0.75" , prompt ["user" ]) # insp1's combined_score
82+ self .assertIn ("0.65" , prompt ["user" ]) # insp2's combined_score
83+
84+ def test_format_inspirations_section_with_feature_dimensions (self ):
85+ """Test _format_inspirations_section directly with feature_dimensions"""
86+ inspirations = [
87+ {
88+ "id" : "test1" ,
89+ "code" : "def test_func(): return 42" ,
90+ "metrics" : {
91+ "combined_score" : 0.9 ,
92+ "accuracy" : 0.95 ,
93+ "complexity" : 10 , # Feature dimension
94+ "memory_usage" : 200 # Feature dimension
95+ },
96+ "metadata" : {"diverse" : True }
97+ }
98+ ]
99+
100+ # Call the method directly
101+ result = self .prompt_sampler ._format_inspirations_section (
102+ inspirations ,
103+ "python" ,
104+ feature_dimensions = ["complexity" , "memory_usage" ]
105+ )
106+
107+ # Should not raise NameError
108+ self .assertIsInstance (result , str )
109+ self .assertIn ("test_func" , result )
110+ self .assertIn ("0.9000" , result ) # The fitness score
111+
112+ def test_format_inspirations_section_without_feature_dimensions (self ):
113+ """Test _format_inspirations_section works without feature_dimensions"""
114+ inspirations = [
115+ {
116+ "id" : "test2" ,
117+ "code" : "def another_func(): pass" ,
118+ "metrics" : {"score" : 0.7 , "time" : 1.2 },
119+ "metadata" : {}
120+ }
121+ ]
122+
123+ # Call without feature_dimensions (should use default of None)
124+ result = self .prompt_sampler ._format_inspirations_section (
125+ inspirations ,
126+ "python"
127+ )
128+
129+ self .assertIsInstance (result , str )
130+ self .assertIn ("another_func" , result )
131+
132+ def test_determine_program_type_with_feature_dimensions (self ):
133+ """Test _determine_program_type with feature_dimensions parameter"""
134+ program = {
135+ "metrics" : {
136+ "combined_score" : 0.85 ,
137+ "complexity" : 5 ,
138+ "memory_usage" : 100
139+ },
140+ "metadata" : {}
141+ }
142+
143+ # Test with feature_dimensions
144+ program_type = self .prompt_sampler ._determine_program_type (
145+ program ,
146+ feature_dimensions = ["complexity" , "memory_usage" ]
147+ )
148+
149+ self .assertEqual (program_type , "High-Performer" ) # Based on combined_score of 0.85
150+
151+ def test_extract_unique_features_calls_determine_program_type (self ):
152+ """Test that _extract_unique_features correctly handles program_type determination"""
153+ program = {
154+ "code" : "" , # Empty code to trigger default features
155+ "metrics" : {"score" : 0.5 },
156+ "metadata" : {}
157+ }
158+
159+ # This should not raise NameError when calling _determine_program_type
160+ features = self .prompt_sampler ._extract_unique_features (program )
161+
162+ self .assertIsInstance (features , str )
163+ self .assertIn ("approach to the problem" , features )
164+
165+ def test_build_prompt_with_all_optional_parameters (self ):
166+ """Test build_prompt with all optional parameters including inspirations"""
167+ current_program = "def main(): pass"
168+
169+ # Comprehensive test data
170+ previous_programs = [
171+ {"id" : "prev1" , "code" : "def v1(): pass" , "metrics" : {"score" : 0.3 }}
172+ ]
173+ top_programs = [
174+ {"id" : "top1" , "code" : "def best(): pass" , "metrics" : {"combined_score" : 0.95 }}
175+ ]
176+ inspirations = [
177+ {"id" : "insp1" , "code" : "def creative(): pass" , "metrics" : {"score" : 0.6 }}
178+ ]
179+
180+ prompt = self .prompt_sampler .build_prompt (
181+ current_program = current_program ,
182+ parent_program = "def parent(): pass" ,
183+ program_metrics = {"combined_score" : 0.7 , "feature1" : 10 },
184+ previous_programs = previous_programs ,
185+ top_programs = top_programs ,
186+ inspirations = inspirations ,
187+ language = "python" ,
188+ evolution_round = 5 ,
189+ diff_based_evolution = True ,
190+ feature_dimensions = ["feature1" ],
191+ program_artifacts = {"output" : "test output" }
192+ )
193+
194+ self .assertIn ("system" , prompt )
195+ self .assertIn ("user" , prompt )
196+ # Verify all components are included
197+ self .assertIn ("main" , prompt ["user" ])
198+ self .assertIn ("best" , prompt ["user" ])
199+ self .assertIn ("creative" , prompt ["user" ])
200+
201+ def test_fitness_calculation_consistency (self ):
202+ """Test that fitness calculation is consistent across all methods"""
203+ metrics = {
204+ "combined_score" : 0.8 ,
205+ "accuracy" : 0.9 ,
206+ "speed" : 0.7 ,
207+ "complexity" : 5 , # Feature dimension
208+ "memory_usage" : 100 # Feature dimension
209+ }
210+ feature_dimensions = ["complexity" , "memory_usage" ]
211+
212+ # Build a prompt with these metrics
213+ prompt = self .prompt_sampler .build_prompt (
214+ current_program = "def test(): pass" ,
215+ program_metrics = metrics ,
216+ inspirations = [
217+ {"id" : "i1" , "code" : "pass" , "metrics" : metrics }
218+ ],
219+ feature_dimensions = feature_dimensions
220+ )
221+
222+ # The fitness score should be 0.8 (combined_score), not an average including features
223+ self .assertIn ("0.8000" , prompt ["user" ]) # Fitness score in prompt
224+
225+ def test_empty_inspirations_list (self ):
226+ """Test that empty inspirations list doesn't break anything"""
227+ prompt = self .prompt_sampler .build_prompt (
228+ current_program = "def empty(): pass" ,
229+ inspirations = [], # Empty list
230+ feature_dimensions = ["test_feature" ]
231+ )
232+
233+ self .assertIn ("system" , prompt )
234+ self .assertIn ("user" , prompt )
235+ # Should complete without errors
236+
237+ def test_inspirations_with_missing_metrics (self ):
238+ """Test handling of inspirations with missing or invalid metrics"""
239+ inspirations = [
240+ {
241+ "id" : "bad1" ,
242+ "code" : "def bad(): pass" ,
243+ "metrics" : {}, # Empty metrics
244+ },
245+ {
246+ "id" : "bad2" ,
247+ "code" : "def worse(): pass" ,
248+ # No metrics key at all
249+ }
250+ ]
251+
252+ # Should handle gracefully without errors
253+ result = self .prompt_sampler ._format_inspirations_section (
254+ inspirations ,
255+ "python" ,
256+ feature_dimensions = ["test" ]
257+ )
258+
259+ self .assertIsInstance (result , str )
260+
261+ def test_feature_dimensions_none_vs_empty_list (self ):
262+ """Test that None and empty list for feature_dimensions are handled correctly"""
263+ program = {"metrics" : {"score" : 0.5 }}
264+
265+ # Test with None
266+ type_none = self .prompt_sampler ._determine_program_type (program , None )
267+
268+ # Test with empty list
269+ type_empty = self .prompt_sampler ._determine_program_type (program , [])
270+
271+ # Both should work and give same result
272+ self .assertEqual (type_none , type_empty )
273+
274+ def test_feature_coordinates_formatting_in_prompt (self ):
275+ """Test that feature coordinates are formatted correctly in the prompt"""
276+ metrics = {
277+ "combined_score" : 0.75 ,
278+ "complexity" : 8 ,
279+ "memory_usage" : 150 ,
280+ "cpu_usage" : 0.3
281+ }
282+
283+ prompt = self .prompt_sampler .build_prompt (
284+ current_program = "def test(): pass" ,
285+ program_metrics = metrics ,
286+ feature_dimensions = ["complexity" , "memory_usage" , "cpu_usage" ]
287+ )
288+
289+ # Check that feature coordinates are included
290+ user_msg = prompt ["user" ]
291+ self .assertIn ("complexity" , user_msg )
292+ self .assertIn ("memory_usage" , user_msg )
293+ self .assertIn ("cpu_usage" , user_msg )
294+
295+
296+ if __name__ == "__main__" :
297+ unittest .main ()
0 commit comments