File tree Expand file tree Collapse file tree 2 files changed +15
-4
lines changed Expand file tree Collapse file tree 2 files changed +15
-4
lines changed Original file line number Diff line number Diff line change @@ -88,7 +88,11 @@ def from_eval_outputs(
8888 total_tests = len (test_instances )
8989 successful_tests = sum (1 for t in test_instances if t .test_result .success )
9090 skipped_tests = sum (1 for t in test_instances if t .test_result .skipped )
91- success_rate = successful_tests / total_tests if total_tests > 0 else 0.0
91+ # Exclude skipped tests from success rate calculation
92+ non_skipped_tests = total_tests - skipped_tests
93+ success_rate = (
94+ successful_tests / non_skipped_tests if non_skipped_tests > 0 else 0.0
95+ )
9296 total_cost = sum (t .cost for t in test_instances )
9397
9498 return cls (
@@ -131,8 +135,13 @@ def from_model_results(
131135 # Calculate overall statistics
132136 total_tests_all = sum (r .total_tests for r in model_results )
133137 total_successful_all = sum (r .successful_tests for r in model_results )
138+ total_skipped_all = sum (r .skipped_tests for r in model_results )
139+ # Exclude skipped tests from overall success rate calculation
140+ non_skipped_tests_all = total_tests_all - total_skipped_all
134141 overall_success_rate = (
135- total_successful_all / total_tests_all if total_tests_all > 0 else 0.0
142+ total_successful_all / non_skipped_tests_all
143+ if non_skipped_tests_all > 0
144+ else 0.0
136145 )
137146 total_cost_all_models = sum (r .total_cost for r in model_results )
138147
Original file line number Diff line number Diff line change @@ -24,7 +24,8 @@ def generate_model_summary_table(model_results: list[ModelTestResults]) -> str:
2424
2525 for result in model_results :
2626 success_rate = f"{ result .success_rate :.1%} "
27- tests_passed = f"{ result .successful_tests } /{ result .total_tests } "
27+ non_skipped = result .total_tests - result .skipped_tests
28+ tests_passed = f"{ result .successful_tests } /{ non_skipped } "
2829 skipped = f"{ result .skipped_tests } "
2930 cost = format_cost (result .total_cost )
3031
@@ -45,11 +46,12 @@ def generate_detailed_results(model_results: list[ModelTestResults]) -> str:
4546 sections = []
4647
4748 for result in model_results :
49+ non_skipped = result .total_tests - result .skipped_tests
4850 section_lines = [
4951 f"### { result .model_name } " ,
5052 "" ,
5153 f"- **Success Rate**: { result .success_rate :.1%} "
52- f"({ result .successful_tests } /{ result . total_tests } )" ,
54+ f"({ result .successful_tests } /{ non_skipped } )" ,
5355 f"- **Total Cost**: { format_cost (result .total_cost )} " ,
5456 f"- **Run Suffix**: `{ result .run_suffix } `" ,
5557 ]
You can’t perform that action at this time.
0 commit comments