Skip to content

Commit a482ab1

Browse files
Fix success rate calculation to exclude skipped tests (#1136)
Co-authored-by: openhands <openhands@all-hands.dev>
1 parent a4f97bd commit a482ab1

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

tests/integration/schemas.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,11 @@ def from_eval_outputs(
8888
total_tests = len(test_instances)
8989
successful_tests = sum(1 for t in test_instances if t.test_result.success)
9090
skipped_tests = sum(1 for t in test_instances if t.test_result.skipped)
91-
success_rate = successful_tests / total_tests if total_tests > 0 else 0.0
91+
# Exclude skipped tests from success rate calculation
92+
non_skipped_tests = total_tests - skipped_tests
93+
success_rate = (
94+
successful_tests / non_skipped_tests if non_skipped_tests > 0 else 0.0
95+
)
9296
total_cost = sum(t.cost for t in test_instances)
9397

9498
return cls(
@@ -131,8 +135,13 @@ def from_model_results(
131135
# Calculate overall statistics
132136
total_tests_all = sum(r.total_tests for r in model_results)
133137
total_successful_all = sum(r.successful_tests for r in model_results)
138+
total_skipped_all = sum(r.skipped_tests for r in model_results)
139+
# Exclude skipped tests from overall success rate calculation
140+
non_skipped_tests_all = total_tests_all - total_skipped_all
134141
overall_success_rate = (
135-
total_successful_all / total_tests_all if total_tests_all > 0 else 0.0
142+
total_successful_all / non_skipped_tests_all
143+
if non_skipped_tests_all > 0
144+
else 0.0
136145
)
137146
total_cost_all_models = sum(r.total_cost for r in model_results)
138147

tests/integration/utils/generate_markdown_report.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ def generate_model_summary_table(model_results: list[ModelTestResults]) -> str:
2424

2525
for result in model_results:
2626
success_rate = f"{result.success_rate:.1%}"
27-
tests_passed = f"{result.successful_tests}/{result.total_tests}"
27+
non_skipped = result.total_tests - result.skipped_tests
28+
tests_passed = f"{result.successful_tests}/{non_skipped}"
2829
skipped = f"{result.skipped_tests}"
2930
cost = format_cost(result.total_cost)
3031

@@ -45,11 +46,12 @@ def generate_detailed_results(model_results: list[ModelTestResults]) -> str:
4546
sections = []
4647

4748
for result in model_results:
49+
non_skipped = result.total_tests - result.skipped_tests
4850
section_lines = [
4951
f"### {result.model_name}",
5052
"",
5153
f"- **Success Rate**: {result.success_rate:.1%} "
52-
f"({result.successful_tests}/{result.total_tests})",
54+
f"({result.successful_tests}/{non_skipped})",
5355
f"- **Total Cost**: {format_cost(result.total_cost)}",
5456
f"- **Run Suffix**: `{result.run_suffix}`",
5557
]

0 commit comments

Comments
 (0)