restore support for gray scale images

AntoineSimoulin · AntoineSimoulin · commit 0ed93675a44d · 2025-11-12T08:46:05.000-08:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -6743,6 +6743,27 @@ def test_functional_error(self):
 class TestToNVCVTensor:
     """Tests for to_nvcv_tensor function following patterns from TestToPil"""
 
+    def test_1_channel_uint8_tensor_to_nvcv_tensor(self):
+        img_data = torch.randint(0, 256, (1, 4, 4), dtype=torch.uint8, device="cuda")
+        nvcv_img = F.to_nvcv_tensor(img_data)
+        # Check that the conversion succeeded and format is correct
+        assert nvcv_img is not None
+
+    def test_1_channel_int16_tensor_to_nvcv_tensor(self):
+        img_data = torch.randint(0, 256, (1, 4, 4), dtype=torch.int16, device="cuda")
+        nvcv_img = F.to_nvcv_tensor(img_data)
+        assert nvcv_img is not None
+
+    def test_1_channel_int32_tensor_to_nvcv_tensor(self):
+        img_data = torch.randint(0, 256, (1, 4, 4), dtype=torch.int32, device="cuda")
+        nvcv_img = F.to_nvcv_tensor(img_data)
+        assert nvcv_img is not None
+
+    def test_1_channel_float32_tensor_to_nvcv_tensor(self):
+        img_data = torch.rand(1, 4, 4, device="cuda")
+        nvcv_img = F.to_nvcv_tensor(img_data)
+        assert nvcv_img is not None
+
     def test_3_channel_uint8_tensor_to_nvcv_tensor(self):
         img_data = torch.randint(0, 256, (3, 4, 4), dtype=torch.uint8, device="cuda")
         nvcv_img = F.to_nvcv_tensor(img_data)
@@ -6754,19 +6775,14 @@ def test_3_channel_float32_tensor_to_nvcv_tensor(self):
         assert nvcv_img is not None
 
     def test_unsupported_num_channels(self):
-        # Test 1-channel image (not supported)
-        img_data = torch.rand(1, 5, 5, device="cuda")
-        with pytest.raises(ValueError, match="Only 3-channel RGB images are supported"):
-            F.to_nvcv_tensor(img_data)
-
         # Test 2-channel image (not supported)
         img_data = torch.rand(2, 5, 5, device="cuda")
-        with pytest.raises(ValueError, match="Only 3-channel RGB images are supported"):
+        with pytest.raises(ValueError, match="Only 1 and 3 channel images are supported"):
             F.to_nvcv_tensor(img_data)
 
         # Test 4-channel image (not supported)
         img_data = torch.randint(0, 256, (4, 5, 5), dtype=torch.uint8, device="cuda")
-        with pytest.raises(ValueError, match="Only 3-channel RGB images are supported"):
+        with pytest.raises(ValueError, match="Only 1 and 3 channel images are supported"):
             F.to_nvcv_tensor(img_data)
 
     def test_invalid_input_type(self):
@@ -6786,19 +6802,30 @@ def test_invalid_dimensions(self):
         with pytest.raises(ValueError, match=r"pic should be 3 or 4 dimensional"):
             F.to_nvcv_tensor(torch.randint(0, 256, (1, 1, 3, 4, 4), dtype=torch.uint8, device="cuda"))
 
+    def test_float64_tensor_to_nvcv_tensor(self):
+        # Test single channel float64 (F64 format is supported)
+        img_data = torch.rand(1, 4, 4, dtype=torch.float64, device="cuda")
+        nvcv_img = F.to_nvcv_tensor(img_data)
+        assert nvcv_img is not None
+
     def test_float64_rgb_not_supported(self):
         # Test 3-channel float64 is NOT supported (no RGBf64 format in CV-CUDA)
         img_data = torch.rand(3, 4, 4, dtype=torch.float64, device="cuda")
         with pytest.raises(TypeError, match=r"Unsupported dtype"):
             F.to_nvcv_tensor(img_data)
 
-    @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
-    def test_round_trip(self, dtype):
-        # Setup: Create a 3-channel tensor in CHW format (PyTorch standard)
+    @pytest.mark.parametrize("num_channels", [1, 3])
+    @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32, torch.float64])
+    def test_round_trip(self, num_channels, dtype):
+        # Skip float64 for 3-channel (not supported by CV-CUDA)
+        if num_channels == 3 and dtype == torch.float64:
+            pytest.skip("float64 is not supported for 3-channel RGB images")
+
+        # Setup: Create a tensor in CHW format (PyTorch standard)
         if dtype == torch.uint8:
-            original_tensor = torch.randint(0, 256, (3, 4, 4), dtype=dtype, device="cuda")
+            original_tensor = torch.randint(0, 256, (num_channels, 4, 4), dtype=dtype, device="cuda")
         else:
-            original_tensor = torch.rand(3, 4, 4, dtype=dtype, device="cuda")
+            original_tensor = torch.rand(num_channels, 4, 4, dtype=dtype, device="cuda")
 
         # Execute: Convert to NVCV and back to tensor
         # CHW -> (to_nvcv_tensor) -> NVCV NHWC -> (nvcv_to_tensor) -> NCHW
@@ -6811,14 +6838,19 @@ def test_round_trip(self, dtype):
         # Assert: The round-trip conversion preserves the original tensor exactly
         torch.testing.assert_close(result_tensor, original_tensor, rtol=0, atol=0)
 
-    @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
+    @pytest.mark.parametrize("num_channels", [1, 3])
+    @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32, torch.float64])
     @pytest.mark.parametrize("batch_size", [1, 2, 4])
-    def test_round_trip_batched(self, dtype, batch_size):
-        # Setup: Create a batched 3-channel tensor in NCHW format
+    def test_round_trip_batched(self, num_channels, dtype, batch_size):
+        # Skip float64 for 3-channel (not supported by CV-CUDA)
+        if num_channels == 3 and dtype == torch.float64:
+            pytest.skip("float64 is not supported for 3-channel RGB images")
+
+        # Setup: Create a batched tensor in NCHW format
         if dtype == torch.uint8:
-            original_tensor = torch.randint(0, 256, (batch_size, 3, 4, 4), dtype=dtype, device="cuda")
+            original_tensor = torch.randint(0, 256, (batch_size, num_channels, 4, 4), dtype=dtype, device="cuda")
         else:
-            original_tensor = torch.rand(batch_size, 3, 4, 4, dtype=dtype, device="cuda")
+            original_tensor = torch.rand(batch_size, num_channels, 4, 4, dtype=dtype, device="cuda")
 
         # Execute: Convert to NVCV and back to tensor
         # NCHW -> (to_nvcv_tensor) -> NVCV NHWC -> (nvcv_to_tensor) -> NCHW
@@ -6834,7 +6866,7 @@ def test_round_trip_batched(self, dtype, batch_size):
 @pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
 @pytest.mark.skipif(not CUDA_AVAILABLE, reason="test requires CUDA")
 class TestNVCVToTensor:
-    @pytest.mark.parametrize("color_space", ["RGB"])
+    @pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
     @pytest.mark.parametrize(
         "fn",
         [F.nvcv_to_tensor, transform_cls_to_functional(transforms.NVCVToTensor)],
diff --git a/torchvision/transforms/v2/functional/_type_conversion.py b/torchvision/transforms/v2/functional/_type_conversion.py
@@ -59,31 +59,46 @@ def _infer_nvcv_format(img_tensor: torch.Tensor):
 
     Args:
         img_tensor: Tensor with shape (H, W, C) where C is number of channels.
-                   Only 3-channel RGB images are supported.
 
     Returns:
-        tuple: (nvcv_format, processed_tensor)
+        tuple: (nvcv_format, processed_tensor) where processed_tensor may have reduced dimensions
+               for single channel images.
 
     Raises:
-        TypeError: If dtype is not supported.
-        ValueError: If number of channels is not 3.
+        TypeError: If dtype is not supported for the given number of channels.
+        ValueError: If number of channels is not 1 or 3.
     """
     _, nvcv = _import_cvcuda_modules()
 
     num_channels = img_tensor.shape[2]
     dtype = img_tensor.dtype
 
-    # Validate number of channels upfront
-    if num_channels != 3:
-        raise ValueError(f"Only 3-channel RGB images are supported. Got {num_channels} channels.")
-
-    # Handle 3 channel RGB images
-    if dtype == torch.uint8:
-        return nvcv.Format.RGB8, img_tensor
-    elif dtype == torch.float32:
-        return nvcv.Format.RGBf32, img_tensor
-    else:
-        raise TypeError(f"Unsupported dtype {dtype} for RGB images. Only uint8 and float32 are supported.")
+    # Handle single channel images
+    if num_channels == 1:
+        if dtype == torch.uint8:
+            return nvcv.Format.U8, img_tensor
+        elif dtype == torch.int16:
+            return nvcv.Format.S16, img_tensor
+        elif dtype == torch.int32:
+            return nvcv.Format.S32, img_tensor
+        elif dtype == torch.float32:
+            return nvcv.Format.F32, img_tensor
+        elif dtype == torch.float64:
+            return nvcv.Format.F64, img_tensor
+        else:
+            raise TypeError(f"Unsupported dtype {dtype} for single channel image")
+
+    # Handle 3 channel images (defaults to RGB)
+    elif num_channels == 3:
+        if dtype == torch.uint8:
+            return nvcv.Format.RGB8, img_tensor
+        elif dtype == torch.float32:
+            return nvcv.Format.RGBf32, img_tensor
+        else:
+            # Note: CV-CUDA does not support float64 for RGB images (only F64 for single-channel)
+            raise TypeError(f"Unsupported dtype {dtype} for 3-channel image")
+
+    raise ValueError(f"Only 1 and 3 channel images are supported. Got {num_channels} channels.")
 
 
 @torch.jit.unused
@@ -95,7 +110,7 @@ def to_nvcv_tensor(pic) -> "nvcv.Tensor":
     Args:
         pic (torch.Tensor): Image to be converted to nvcv.Tensor.
             Tensor can be in CHW format (unbatched) or NCHW format (batched).
-            Only 3-channel RGB images are supported.
+            Only 1-channel and 3-channel images are supported.
 
     Returns:
         nvcv.Tensor: Image converted to nvcv.Tensor with NHWC layout.
@@ -122,11 +137,11 @@ def to_nvcv_tensor(pic) -> "nvcv.Tensor":
     # Convert NCHW -> NHWC
     img_tensor = img_tensor.permute(0, 2, 3, 1)
 
-    # Infer format from the first image - this validates we have 3 channels
+    # Infer format from the first image
     sample_img = img_tensor[0]
     _infer_nvcv_format(sample_img)
 
-    # Convert to NVCV tensor with NHWC layout (always multi-channel RGB)
+    # Convert to NVCV tensor with NHWC layout
     return cvcuda.as_tensor(img_tensor.cuda().contiguous(), nvcv.TensorLayout.NHWC)