11import sys
2+ from dataclasses import dataclass
3+ from enum import Enum
24
35from codeflash .cli_cmds .console import logger
46from codeflash .models .models import TestResults , TestType , VerificationType
79INCREASED_RECURSION_LIMIT = 5000
810
911
10- def compare_test_results (original_results : TestResults , candidate_results : TestResults ) -> bool :
12+ class TestDiffScope (Enum ):
13+ RETURN_VALUE = "return_value"
14+ STDOUT = "stdout"
15+ TIMED_OUT = "timed_out"
16+ DID_PASS = "did_pass" # noqa: S105
17+
18+
19+ @dataclass
20+ class TestDiff :
21+ scope : TestDiffScope
22+ test_src_code : str
23+ pytest_error : str
24+ original_value : any
25+ candidate_value : any
26+
27+
28+ def compare_test_results (original_results : TestResults , candidate_results : TestResults ) -> tuple [bool , list [TestDiff ]]:
1129 # This is meant to be only called with test results for the first loop index
1230 if len (original_results ) == 0 or len (candidate_results ) == 0 :
13- return False # empty test results are not equal
31+ return False , [] # empty test results are not equal
1432 original_recursion_limit = sys .getrecursionlimit ()
1533 if original_recursion_limit < INCREASED_RECURSION_LIMIT :
1634 sys .setrecursionlimit (INCREASED_RECURSION_LIMIT ) # Increase recursion limit to avoid RecursionError
1735 test_ids_superset = original_results .get_all_unique_invocation_loop_ids ().union (
1836 set (candidate_results .get_all_unique_invocation_loop_ids ())
1937 )
20- are_equal : bool = True
38+ test_diffs : list [ TestDiff ] = []
2139 did_all_timeout : bool = True
2240 for test_id in test_ids_superset :
2341 original_test_result = original_results .get_by_unique_invocation_loop_id (test_id )
2442 cdd_test_result = candidate_results .get_by_unique_invocation_loop_id (test_id )
43+ candidate_pytest_error = candidate_results .test_failures .get (original_test_result .id .test_function_name )
2544 if cdd_test_result is not None and original_test_result is None :
2645 continue
2746 # If helper function instance_state verification is not present, that's ok. continue
@@ -32,8 +51,7 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
3251 ):
3352 continue
3453 if original_test_result is None or cdd_test_result is None :
35- are_equal = False
36- break
54+ return False , []
3755 did_all_timeout = did_all_timeout and original_test_result .timed_out
3856 if original_test_result .timed_out :
3957 continue
@@ -43,31 +61,42 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
4361 in {VerificationType .INIT_STATE_HELPER , VerificationType .INIT_STATE_FTO }
4462 ):
4563 superset_obj = True
64+ test_src_code = original_test_result .id .get_src_code (original_test_result .file_name )
4665 if not comparator (original_test_result .return_value , cdd_test_result .return_value , superset_obj = superset_obj ):
47- are_equal = False
66+ test_diffs .append (
67+ TestDiff (
68+ scope = TestDiffScope .RETURN_VALUE ,
69+ test_src_code = test_src_code ,
70+ original_value = original_test_result .return_value ,
71+ candidate_value = cdd_test_result .return_value ,
72+ pytest_error = candidate_pytest_error ,
73+ )
74+ )
75+
4876 try :
49- logger .debug (
50- "File Name: %s\n "
51- "Test Type: %s\n "
52- "Verification Type: %s\n "
53- "Invocation ID: %s\n "
54- "Original return value: %s\n "
55- "Candidate return value: %s\n "
56- "-------------------" ,
57- original_test_result .file_name ,
58- original_test_result .test_type ,
59- original_test_result .verification_type ,
60- original_test_result .id ,
61- original_test_result .return_value ,
62- cdd_test_result .return_value ,
77+ print (
78+ f"File Name: { original_test_result .file_name } \n "
79+ f"Test Type: { original_test_result .test_type } \n "
80+ f"Verification Type: { original_test_result .verification_type } \n "
81+ f"Invocation ID: { original_test_result .id } \n "
82+ f"Original return value: { original_test_result .return_value } \n "
83+ f"Candidate return value: { cdd_test_result .return_value } \n "
6384 )
6485 except Exception as e :
6586 logger .error (e )
6687 break
6788 if (original_test_result .stdout and cdd_test_result .stdout ) and not comparator (
6889 original_test_result .stdout , cdd_test_result .stdout
6990 ):
70- are_equal = False
91+ test_diffs .append (
92+ TestDiff (
93+ scope = TestDiffScope .STDOUT ,
94+ test_src_code = test_src_code ,
95+ original_value = original_test_result .stdout ,
96+ candidate_value = cdd_test_result .stdout ,
97+ pytest_error = candidate_pytest_error ,
98+ )
99+ )
71100 break
72101
73102 if original_test_result .test_type in {
@@ -76,9 +105,17 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
76105 TestType .GENERATED_REGRESSION ,
77106 TestType .REPLAY_TEST ,
78107 } and (cdd_test_result .did_pass != original_test_result .did_pass ):
79- are_equal = False
108+ test_diffs .append (
109+ TestDiff (
110+ scope = TestDiffScope .DID_PASS ,
111+ test_src_code = test_src_code ,
112+ original_value = original_test_result .did_pass ,
113+ candidate_value = cdd_test_result .did_pass ,
114+ pytest_error = candidate_pytest_error ,
115+ )
116+ )
80117 break
81118 sys .setrecursionlimit (original_recursion_limit )
82119 if did_all_timeout :
83- return False
84- return are_equal
120+ return False , test_diffs
121+ return len ( test_diffs ) == 0 , test_diffs
0 commit comments