diff --git a/src/_pytest/skipping.py b/src/_pytest/skipping.py
index 3b067629de0..432e5913766 100644
--- a/src/_pytest/skipping.py
+++ b/src/_pytest/skipping.py
@@ -287,21 +287,27 @@ def pytest_runtest_makereport(
         rep.outcome = "skipped"
     elif not rep.skipped and xfailed:
         if call.excinfo:
-            raises = xfailed.raises
-            if raises is None or (
-                (
-                    isinstance(raises, type | tuple)
-                    and isinstance(call.excinfo.value, raises)
-                )
-                or (
-                    isinstance(raises, AbstractRaises)
-                    and raises.matches(call.excinfo.value)
-                )
-            ):
-                rep.outcome = "skipped"
-                rep.wasxfail = xfailed.reason
-            else:
-                rep.outcome = "failed"
+            # Only apply xfail handling to the "call" phase.
+            # Setup and teardown failures should be reported as errors,
+            # not as expected failures, even if the test is marked xfail.
+            # This ensures that fixture teardown exceptions (e.g., from
+            # session-scoped fixtures) are properly reported as errors.
+            if call.when == "call":
+                raises = xfailed.raises
+                if raises is None or (
+                    (
+                        isinstance(raises, type | tuple)
+                        and isinstance(call.excinfo.value, raises)
+                    )
+                    or (
+                        isinstance(raises, AbstractRaises)
+                        and raises.matches(call.excinfo.value)
+                    )
+                ):
+                    rep.outcome = "skipped"
+                    rep.wasxfail = xfailed.reason
+                else:
+                    rep.outcome = "failed"
         elif call.when == "call":
             if xfailed.strict:
                 rep.outcome = "failed"
diff --git a/testing/python/fixtures.py b/testing/python/fixtures.py
index 6a65dce3c4d..033c12b1cb6 100644
--- a/testing/python/fixtures.py
+++ b/testing/python/fixtures.py
@@ -4914,7 +4914,8 @@ def test_crash_expected_setup_and_teardown() -> None:
         """
     )
     result = pytester.runpytest()
-    assert result.ret == 0
+    # Fixture setup failures are reported as errors, not xfails
+    assert result.ret == 1  # Errors from fixture setup failures
 
 
 def test_scoped_fixture_teardown_order(pytester: Pytester) -> None:
diff --git a/testing/test_skipping.py b/testing/test_skipping.py
index e1e25e45468..ef1ede390fc 100644
--- a/testing/test_skipping.py
+++ b/testing/test_skipping.py
@@ -737,6 +737,11 @@ def test_2():
 
 class TestXFailwithSetupTeardown:
     def test_failing_setup_issue9(self, pytester: Pytester) -> None:
+        """Setup failures should be reported as errors, not xfails.
+
+        Even if a test is marked xfail, if the setup fails, that's an
+        infrastructure error, not an expected test failure.
+        """
         pytester.makepyfile(
             """
             import pytest
@@ -749,9 +754,14 @@ def test_func():
         """
         )
         result = pytester.runpytest()
-        result.stdout.fnmatch_lines(["*1 xfail*"])
+        result.stdout.fnmatch_lines(["*1 error*"])
 
     def test_failing_teardown_issue9(self, pytester: Pytester) -> None:
+        """Teardown failures should be reported as errors, not xfails.
+
+        Even if a test is marked xfail, if the teardown fails, that's an
+        infrastructure error, not an expected test failure.
+        """
         pytester.makepyfile(
             """
             import pytest
@@ -764,7 +774,7 @@ def test_func():
         """
         )
         result = pytester.runpytest()
-        result.stdout.fnmatch_lines(["*1 xfail*"])
+        result.stdout.fnmatch_lines(["*1 error*"])
 
 
 class TestSkip:
@@ -1185,6 +1195,11 @@ def test_default_markers(pytester: Pytester) -> None:
 
 
 def test_xfail_test_setup_exception(pytester: Pytester) -> None:
+    """Setup exceptions should be reported as errors, not xfails.
+
+    Even if a test is marked xfail, if setup fails (via pytest_runtest_setup hook),
+    that's an infrastructure error, not an expected test failure.
+    """
     pytester.makeconftest(
         """
             def pytest_runtest_setup():
@@ -1200,9 +1215,9 @@ def test_func():
         """
     )
     result = pytester.runpytest(p)
-    assert result.ret == 0
-    assert "xfailed" in result.stdout.str()
-    result.stdout.no_fnmatch_line("*xpassed*")
+    assert result.ret == 1  # Should fail due to error
+    assert "error" in result.stdout.str()
+    result.stdout.no_fnmatch_line("*xfailed*")
 
 
 def test_imperativeskip_on_xfail_test(pytester: Pytester) -> None:
@@ -1489,3 +1504,37 @@ def test_exit_reason_only():
     )
     result = pytester.runpytest(p)
     result.stdout.fnmatch_lines("*_pytest.outcomes.Exit: foo*")
+
+
+def test_session_fixture_teardown_exception_with_xfail(pytester: Pytester) -> None:
+    """Test that session fixture teardown exceptions are reported as errors,
+    not as duplicate xfails, even when the last test is marked xfail.
+
+    Regression test for issue #8375.
+    """
+    pytester.makepyfile(
+        """
+        import pytest
+
+        @pytest.fixture(autouse=True, scope='session')
+        def failme():
+            yield
+            raise RuntimeError('cleanup fails for some reason')
+
+        def test_ok():
+            assert True
+
+        @pytest.mark.xfail()
+        def test_expected_failure():
+            assert False
+        """
+    )
+    result = pytester.runpytest("-q")
+    result.stdout.fnmatch_lines(
+        [
+            "*1 passed, 1 xfailed, 1 error*",
+        ]
+    )
+    # Make sure we don't have duplicate xfails (would be "2 xfailed" before the fix)
+    assert "2 xfailed" not in result.stdout.str()
+    assert "1 xfailed" in result.stdout.str()