From 48abb2a8ed12be3fb33593fed37d4e2746e9eca4 Mon Sep 17 00:00:00 2001
From: Fazeel Usmani <fazeel.usmani18@gmail.com>
Date: Fri, 7 Nov 2025 12:51:59 +0000
Subject: [PATCH 1/2] Fix session fixture teardown exceptions being reported as
 duplicate XFAILs

When a session-scoped autouse fixture raises an exception during teardown,
and the last test in the suite is marked @pytest.mark.xfail, pytest was
incorrectly showing an extra XFAIL line (duplicated) instead of reporting
the teardown failure as an ERROR.

The root cause was that the xfail handling in pytest_runtest_makereport was
being applied to all phases (setup, call, teardown), converting any exception
into an xfail result if the test was marked with xfail. This meant that
session fixture teardown exceptions were being misreported as expected failures.

The fix restricts xfail handling to only apply during the "call" phase.
Setup and teardown failures are now properly reported as errors, regardless
of xfail markers on the test. This aligns with the principle that xfail
should only apply to test execution, not to fixture setup/teardown failures.

Fixes #8375
---
 src/_pytest/skipping.py    | 36 ++++++++++++++----------
 testing/python/fixtures.py |  3 +-
 testing/test_skipping.py   | 57 ++++++++++++++++++++++++++++++++++----
 3 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/src/_pytest/skipping.py b/src/_pytest/skipping.py
index 3b067629de0..432e5913766 100644
--- a/src/_pytest/skipping.py
+++ b/src/_pytest/skipping.py
@@ -287,21 +287,27 @@ def pytest_runtest_makereport(
         rep.outcome = "skipped"
     elif not rep.skipped and xfailed:
         if call.excinfo:
-            raises = xfailed.raises
-            if raises is None or (
-                (
-                    isinstance(raises, type | tuple)
-                    and isinstance(call.excinfo.value, raises)
-                )
-                or (
-                    isinstance(raises, AbstractRaises)
-                    and raises.matches(call.excinfo.value)
-                )
-            ):
-                rep.outcome = "skipped"
-                rep.wasxfail = xfailed.reason
-            else:
-                rep.outcome = "failed"
+            # Only apply xfail handling to the "call" phase.
+            # Setup and teardown failures should be reported as errors,
+            # not as expected failures, even if the test is marked xfail.
+            # This ensures that fixture teardown exceptions (e.g., from
+            # session-scoped fixtures) are properly reported as errors.
+            if call.when == "call":
+                raises = xfailed.raises
+                if raises is None or (
+                    (
+                        isinstance(raises, type | tuple)
+                        and isinstance(call.excinfo.value, raises)
+                    )
+                    or (
+                        isinstance(raises, AbstractRaises)
+                        and raises.matches(call.excinfo.value)
+                    )
+                ):
+                    rep.outcome = "skipped"
+                    rep.wasxfail = xfailed.reason
+                else:
+                    rep.outcome = "failed"
         elif call.when == "call":
             if xfailed.strict:
                 rep.outcome = "failed"
diff --git a/testing/python/fixtures.py b/testing/python/fixtures.py
index 6a65dce3c4d..033c12b1cb6 100644
--- a/testing/python/fixtures.py
+++ b/testing/python/fixtures.py
@@ -4914,7 +4914,8 @@ def test_crash_expected_setup_and_teardown() -> None:
         """
     )
     result = pytester.runpytest()
-    assert result.ret == 0
+    # Fixture setup failures are reported as errors, not xfails
+    assert result.ret == 1  # Errors from fixture setup failures
 
 
 def test_scoped_fixture_teardown_order(pytester: Pytester) -> None:
diff --git a/testing/test_skipping.py b/testing/test_skipping.py
index e1e25e45468..ad6264348cd 100644
--- a/testing/test_skipping.py
+++ b/testing/test_skipping.py
@@ -737,6 +737,11 @@ def test_2():
 
 class TestXFailwithSetupTeardown:
     def test_failing_setup_issue9(self, pytester: Pytester) -> None:
+        """Setup failures should be reported as errors, not xfails.
+
+        Even if a test is marked xfail, if the setup fails, that's an
+        infrastructure error, not an expected test failure.
+        """
         pytester.makepyfile(
             """
             import pytest
@@ -749,9 +754,14 @@ def test_func():
         """
         )
         result = pytester.runpytest()
-        result.stdout.fnmatch_lines(["*1 xfail*"])
+        result.stdout.fnmatch_lines(["*1 error*"])
 
     def test_failing_teardown_issue9(self, pytester: Pytester) -> None:
+        """Teardown failures should be reported as errors, not xfails.
+
+        Even if a test is marked xfail, if the teardown fails, that's an
+        infrastructure error, not an expected test failure.
+        """
         pytester.makepyfile(
             """
             import pytest
@@ -764,7 +774,7 @@ def test_func():
         """
         )
         result = pytester.runpytest()
-        result.stdout.fnmatch_lines(["*1 xfail*"])
+        result.stdout.fnmatch_lines(["*1 error*"])
 
 
 class TestSkip:
@@ -1185,6 +1195,11 @@ def test_default_markers(pytester: Pytester) -> None:
 
 
 def test_xfail_test_setup_exception(pytester: Pytester) -> None:
+    """Setup exceptions should be reported as errors, not xfails.
+
+    Even if a test is marked xfail, if setup fails (via pytest_runtest_setup hook),
+    that's an infrastructure error, not an expected test failure.
+    """
     pytester.makeconftest(
         """
             def pytest_runtest_setup():
@@ -1200,9 +1215,9 @@ def test_func():
         """
     )
     result = pytester.runpytest(p)
-    assert result.ret == 0
-    assert "xfailed" in result.stdout.str()
-    result.stdout.no_fnmatch_line("*xpassed*")
+    assert result.ret == 1  # Should fail due to error
+    assert "error" in result.stdout.str()
+    result.stdout.no_fnmatch_line("*xfailed*")
 
 
 def test_imperativeskip_on_xfail_test(pytester: Pytester) -> None:
@@ -1489,3 +1504,35 @@ def test_exit_reason_only():
     )
     result = pytester.runpytest(p)
     result.stdout.fnmatch_lines("*_pytest.outcomes.Exit: foo*")
+
+
+def test_session_fixture_teardown_exception_with_xfail(pytester: Pytester) -> None:
+    """Test that session fixture teardown exceptions are reported as errors,
+    not as duplicate xfails, even when the last test is marked xfail.
+
+    Regression test for issue #8375.
+    """
+    pytester.makepyfile(
+        """
+        import pytest
+
+        @pytest.fixture(autouse=True, scope='session')
+        def failme():
+            yield
+            raise RuntimeError('cleanup fails for some reason')
+
+        def test_ok():
+            assert True
+
+        @pytest.mark.xfail()
+        def test_expected_failure():
+            assert False
+        """
+    )
+    result = pytester.runpytest("-q")
+    result.stdout.fnmatch_lines([
+        "*1 passed, 1 xfailed, 1 error*",
+    ])
+    # Make sure we don't have duplicate xfails (would be "2 xfailed" before the fix)
+    assert "2 xfailed" not in result.stdout.str()
+    assert "1 xfailed" in result.stdout.str()

From ca20e5f8b697896820fa91930abb5e26f2d04aea Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 7 Nov 2025 15:06:22 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 testing/test_skipping.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/testing/test_skipping.py b/testing/test_skipping.py
index ad6264348cd..ef1ede390fc 100644
--- a/testing/test_skipping.py
+++ b/testing/test_skipping.py
@@ -1530,9 +1530,11 @@ def test_expected_failure():
         """
     )
     result = pytester.runpytest("-q")
-    result.stdout.fnmatch_lines([
-        "*1 passed, 1 xfailed, 1 error*",
-    ])
+    result.stdout.fnmatch_lines(
+        [
+            "*1 passed, 1 xfailed, 1 error*",
+        ]
+    )
     # Make sure we don't have duplicate xfails (would be "2 xfailed" before the fix)
     assert "2 xfailed" not in result.stdout.str()
     assert "1 xfailed" in result.stdout.str()