From df7ebb6ba9bd35ce890ede0ff7e0b9d2c555e124 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Thu, 16 Oct 2025 08:41:55 +0000 Subject: [PATCH 1/2] port cuda specific case with hook --- test/xpu/test_nn_xpu.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/xpu/test_nn_xpu.py b/test/xpu/test_nn_xpu.py index 4ff4bcef2..abdecc20e 100644 --- a/test/xpu/test_nn_xpu.py +++ b/test/xpu/test_nn_xpu.py @@ -16,6 +16,7 @@ from torch.testing._internal.common_device_type import ( dtypes, instantiate_device_type_tests, + largeTensorTest, ) from torch.testing._internal.common_dtype import get_all_math_dtypes, integral_types from torch.testing._internal.common_utils import ( @@ -3786,6 +3787,39 @@ def test_cross_entropy_loss_2d_out_of_bounds_class_index(self): ) +@dtypes(torch.float, torch.half) +@largeTensorTest("20GB") +@largeTensorTest("64GB", "cpu") +def _test_warp_softmax_64bit_indexing(self, device, dtype): + def run_test(*shape): + x = torch.randn(shape, device="xpu", dtype=torch.float16, requires_grad=True) + y = F.log_softmax(x, dim=-1, dtype=dtype) + y.backward(y) + with torch.no_grad(): + xx = x.cpu().requires_grad_() + yy = F.log_softmax(xx.float(), dim=-1).to(dtype) + yy.backward(yy) + # workaround to reduce memory usage vs. self.assertEqual, see #84944 + rtol, atol = torch.testing._comparison.get_tolerances( + dtype, rtol=None, atol=None + ) + self.assertTrue(torch.allclose(y.cpu(), yy, rtol=rtol, atol=atol)) + # x is half + rtol, _ = torch.testing._comparison.get_tolerances( + torch.half, rtol=None, atol=None + ) + self.assertTrue(torch.allclose(x.grad.cpu(), xx.grad, rtol=rtol, atol=1e-3)) + + run_test( + 1100000000, 2 + ) # Illegal memory access https://github.com/pytorch/pytorch/issues/52715 + run_test( + 2200000000, 1 + ) # invalid configuration argument https://github.com/pytorch/pytorch/issues/52716 + + +TestNNDeviceType.test_warp_softmax_64bit_indexing = _test_warp_softmax_64bit_indexing + TestNNDeviceType.test_cross_entropy_loss_2d_out_of_bounds_class_index = ( _test_cross_entropy_loss_2d_out_of_bounds_class_index ) From 6ab64469aeae61de1fbe84ef8e901065af6fcbb3 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Fri, 17 Oct 2025 03:20:02 +0000 Subject: [PATCH 2/2] replace hardcoded xpu with device variable --- test/xpu/test_nn_xpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/xpu/test_nn_xpu.py b/test/xpu/test_nn_xpu.py index abdecc20e..72d2647af 100644 --- a/test/xpu/test_nn_xpu.py +++ b/test/xpu/test_nn_xpu.py @@ -3792,7 +3792,7 @@ def test_cross_entropy_loss_2d_out_of_bounds_class_index(self): @largeTensorTest("64GB", "cpu") def _test_warp_softmax_64bit_indexing(self, device, dtype): def run_test(*shape): - x = torch.randn(shape, device="xpu", dtype=torch.float16, requires_grad=True) + x = torch.randn(shape, device=device, dtype=torch.float16, requires_grad=True) y = F.log_softmax(x, dim=-1, dtype=dtype) y.backward(y) with torch.no_grad():