diff --git a/src/_pytest/skipping.py b/src/_pytest/skipping.py index 3b067629de0..432e5913766 100644 --- a/src/_pytest/skipping.py +++ b/src/_pytest/skipping.py @@ -287,21 +287,27 @@ def pytest_runtest_makereport( rep.outcome = "skipped" elif not rep.skipped and xfailed: if call.excinfo: - raises = xfailed.raises - if raises is None or ( - ( - isinstance(raises, type | tuple) - and isinstance(call.excinfo.value, raises) - ) - or ( - isinstance(raises, AbstractRaises) - and raises.matches(call.excinfo.value) - ) - ): - rep.outcome = "skipped" - rep.wasxfail = xfailed.reason - else: - rep.outcome = "failed" + # Only apply xfail handling to the "call" phase. + # Setup and teardown failures should be reported as errors, + # not as expected failures, even if the test is marked xfail. + # This ensures that fixture teardown exceptions (e.g., from + # session-scoped fixtures) are properly reported as errors. + if call.when == "call": + raises = xfailed.raises + if raises is None or ( + ( + isinstance(raises, type | tuple) + and isinstance(call.excinfo.value, raises) + ) + or ( + isinstance(raises, AbstractRaises) + and raises.matches(call.excinfo.value) + ) + ): + rep.outcome = "skipped" + rep.wasxfail = xfailed.reason + else: + rep.outcome = "failed" elif call.when == "call": if xfailed.strict: rep.outcome = "failed" diff --git a/testing/python/fixtures.py b/testing/python/fixtures.py index 6a65dce3c4d..033c12b1cb6 100644 --- a/testing/python/fixtures.py +++ b/testing/python/fixtures.py @@ -4914,7 +4914,8 @@ def test_crash_expected_setup_and_teardown() -> None: """ ) result = pytester.runpytest() - assert result.ret == 0 + # Fixture setup failures are reported as errors, not xfails + assert result.ret == 1 # Errors from fixture setup failures def test_scoped_fixture_teardown_order(pytester: Pytester) -> None: diff --git a/testing/test_skipping.py b/testing/test_skipping.py index e1e25e45468..ef1ede390fc 100644 --- a/testing/test_skipping.py +++ b/testing/test_skipping.py @@ -737,6 +737,11 @@ def test_2(): class TestXFailwithSetupTeardown: def test_failing_setup_issue9(self, pytester: Pytester) -> None: + """Setup failures should be reported as errors, not xfails. + + Even if a test is marked xfail, if the setup fails, that's an + infrastructure error, not an expected test failure. + """ pytester.makepyfile( """ import pytest @@ -749,9 +754,14 @@ def test_func(): """ ) result = pytester.runpytest() - result.stdout.fnmatch_lines(["*1 xfail*"]) + result.stdout.fnmatch_lines(["*1 error*"]) def test_failing_teardown_issue9(self, pytester: Pytester) -> None: + """Teardown failures should be reported as errors, not xfails. + + Even if a test is marked xfail, if the teardown fails, that's an + infrastructure error, not an expected test failure. + """ pytester.makepyfile( """ import pytest @@ -764,7 +774,7 @@ def test_func(): """ ) result = pytester.runpytest() - result.stdout.fnmatch_lines(["*1 xfail*"]) + result.stdout.fnmatch_lines(["*1 error*"]) class TestSkip: @@ -1185,6 +1195,11 @@ def test_default_markers(pytester: Pytester) -> None: def test_xfail_test_setup_exception(pytester: Pytester) -> None: + """Setup exceptions should be reported as errors, not xfails. + + Even if a test is marked xfail, if setup fails (via pytest_runtest_setup hook), + that's an infrastructure error, not an expected test failure. + """ pytester.makeconftest( """ def pytest_runtest_setup(): @@ -1200,9 +1215,9 @@ def test_func(): """ ) result = pytester.runpytest(p) - assert result.ret == 0 - assert "xfailed" in result.stdout.str() - result.stdout.no_fnmatch_line("*xpassed*") + assert result.ret == 1 # Should fail due to error + assert "error" in result.stdout.str() + result.stdout.no_fnmatch_line("*xfailed*") def test_imperativeskip_on_xfail_test(pytester: Pytester) -> None: @@ -1489,3 +1504,37 @@ def test_exit_reason_only(): ) result = pytester.runpytest(p) result.stdout.fnmatch_lines("*_pytest.outcomes.Exit: foo*") + + +def test_session_fixture_teardown_exception_with_xfail(pytester: Pytester) -> None: + """Test that session fixture teardown exceptions are reported as errors, + not as duplicate xfails, even when the last test is marked xfail. + + Regression test for issue #8375. + """ + pytester.makepyfile( + """ + import pytest + + @pytest.fixture(autouse=True, scope='session') + def failme(): + yield + raise RuntimeError('cleanup fails for some reason') + + def test_ok(): + assert True + + @pytest.mark.xfail() + def test_expected_failure(): + assert False + """ + ) + result = pytester.runpytest("-q") + result.stdout.fnmatch_lines( + [ + "*1 passed, 1 xfailed, 1 error*", + ] + ) + # Make sure we don't have duplicate xfails (would be "2 xfailed" before the fix) + assert "2 xfailed" not in result.stdout.str() + assert "1 xfailed" in result.stdout.str()