restrict support to 3 channels images for _infer_nvcv_format

AntoineSimoulin · AntoineSimoulin · commit 54584dd60a07 · 2025-11-12T07:26:44.000-08:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -6743,27 +6743,6 @@ def test_functional_error(self):
 class TestToNVCVTensor:
     """Tests for to_nvcv_tensor function following patterns from TestToPil"""
 
-    def test_1_channel_uint8_tensor_to_nvcv_tensor(self):
-        img_data = torch.randint(0, 256, (1, 4, 4), dtype=torch.uint8, device="cuda")
-        nvcv_img = F.to_nvcv_tensor(img_data)
-        # Check that the conversion succeeded and format is correct
-        assert nvcv_img is not None
-
-    def test_1_channel_int16_tensor_to_nvcv_tensor(self):
-        img_data = torch.randint(0, 256, (1, 4, 4), dtype=torch.int16, device="cuda")
-        nvcv_img = F.to_nvcv_tensor(img_data)
-        assert nvcv_img is not None
-
-    def test_1_channel_int32_tensor_to_nvcv_tensor(self):
-        img_data = torch.randint(0, 256, (1, 4, 4), dtype=torch.int32, device="cuda")
-        nvcv_img = F.to_nvcv_tensor(img_data)
-        assert nvcv_img is not None
-
-    def test_1_channel_float32_tensor_to_nvcv_tensor(self):
-        img_data = torch.rand(1, 4, 4, device="cuda")
-        nvcv_img = F.to_nvcv_tensor(img_data)
-        assert nvcv_img is not None
-
     def test_3_channel_uint8_tensor_to_nvcv_tensor(self):
         img_data = torch.randint(0, 256, (3, 4, 4), dtype=torch.uint8, device="cuda")
         nvcv_img = F.to_nvcv_tensor(img_data)
@@ -6775,19 +6754,19 @@ def test_3_channel_float32_tensor_to_nvcv_tensor(self):
         assert nvcv_img is not None
 
     def test_unsupported_num_channels(self):
-        # Test 2-channel image (CHW format: 2 channels x 5 height x 5 width)
-        img_data = torch.rand(2, 5, 5, device="cuda")
-        with pytest.raises(ValueError, match="Only 1 and 3 channel images are supported"):
+        # Test 1-channel image (not supported)
+        img_data = torch.rand(1, 5, 5, device="cuda")
+        with pytest.raises(ValueError, match="Only 3-channel RGB images are supported"):
             F.to_nvcv_tensor(img_data)
 
-        # Test 4-channel image (CHW format: 4 channels x 5 height x 5 width)
-        img_data = torch.randint(0, 256, (4, 5, 5), dtype=torch.uint8, device="cuda")
-        with pytest.raises(ValueError, match="Only 1 and 3 channel images are supported"):
+        # Test 2-channel image (not supported)
+        img_data = torch.rand(2, 5, 5, device="cuda")
+        with pytest.raises(ValueError, match="Only 3-channel RGB images are supported"):
             F.to_nvcv_tensor(img_data)
 
-        # Test 5-channel image (CHW format: 5 channels x 5 height x 5 width)
-        img_data = torch.randint(0, 256, (5, 5, 5), dtype=torch.uint8, device="cuda")
-        with pytest.raises(ValueError, match="Only 1 and 3 channel images are supported"):
+        # Test 4-channel image (not supported)
+        img_data = torch.randint(0, 256, (4, 5, 5), dtype=torch.uint8, device="cuda")
+        with pytest.raises(ValueError, match="Only 3-channel RGB images are supported"):
             F.to_nvcv_tensor(img_data)
 
     def test_invalid_input_type(self):
@@ -6807,30 +6786,19 @@ def test_invalid_dimensions(self):
         with pytest.raises(ValueError, match=r"pic should be 3 or 4 dimensional"):
             F.to_nvcv_tensor(torch.randint(0, 256, (1, 1, 3, 4, 4), dtype=torch.uint8, device="cuda"))
 
-    def test_float64_tensor_to_nvcv_tensor(self):
-        # Test single channel float64 (F64 format is supported)
-        img_data = torch.rand(1, 4, 4, dtype=torch.float64, device="cuda")
-        nvcv_img = F.to_nvcv_tensor(img_data)
-        assert nvcv_img is not None
-
     def test_float64_rgb_not_supported(self):
         # Test 3-channel float64 is NOT supported (no RGBf64 format in CV-CUDA)
         img_data = torch.rand(3, 4, 4, dtype=torch.float64, device="cuda")
         with pytest.raises(TypeError, match=r"Unsupported dtype"):
             F.to_nvcv_tensor(img_data)
 
-    @pytest.mark.parametrize("num_channels", [1, 3])
-    @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32, torch.float64])
-    def test_round_trip(self, num_channels, dtype):
-        # Skip float64 for 3-channel (not supported by CV-CUDA)
-        if num_channels == 3 and dtype == torch.float64:
-            pytest.skip("float64 is not supported for 3-channel RGB images")
-
-        # Setup: Create a tensor in CHW format (PyTorch standard)
+    @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
+    def test_round_trip(self, dtype):
+        # Setup: Create a 3-channel tensor in CHW format (PyTorch standard)
         if dtype == torch.uint8:
-            original_tensor = torch.randint(0, 256, (num_channels, 4, 4), dtype=dtype, device="cuda")
+            original_tensor = torch.randint(0, 256, (3, 4, 4), dtype=dtype, device="cuda")
         else:
-            original_tensor = torch.rand(num_channels, 4, 4, dtype=dtype, device="cuda")
+            original_tensor = torch.rand(3, 4, 4, dtype=dtype, device="cuda")
 
         # Execute: Convert to NVCV and back to tensor
         # CHW -> (to_nvcv_tensor) -> NVCV NHWC -> (nvcv_to_tensor) -> CHW
@@ -6841,19 +6809,14 @@ def test_round_trip(self, num_channels, dtype):
         # Use allclose for robust comparison that handles floating-point precision
         assert torch.allclose(result_tensor, original_tensor, rtol=1e-5, atol=1e-7)
 
-    @pytest.mark.parametrize("num_channels", [1, 3])
-    @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32, torch.float64])
+    @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
     @pytest.mark.parametrize("batch_size", [1, 2, 4])
-    def test_round_trip_batched(self, num_channels, dtype, batch_size):
-        # Skip float64 for 3-channel (not supported by CV-CUDA)
-        if num_channels == 3 and dtype == torch.float64:
-            pytest.skip("float64 is not supported for 3-channel RGB images")
-
-        # Setup: Create a batched tensor in NCHW format
+    def test_round_trip_batched(self, dtype, batch_size):
+        # Setup: Create a batched 3-channel tensor in NCHW format
         if dtype == torch.uint8:
-            original_tensor = torch.randint(0, 256, (batch_size, num_channels, 4, 4), dtype=dtype, device="cuda")
+            original_tensor = torch.randint(0, 256, (batch_size, 3, 4, 4), dtype=dtype, device="cuda")
         else:
-            original_tensor = torch.rand(batch_size, num_channels, 4, 4, dtype=dtype, device="cuda")
+            original_tensor = torch.rand(batch_size, 3, 4, 4, dtype=dtype, device="cuda")
 
         # Execute: Convert to NVCV and back to tensor
         # NCHW -> (to_nvcv_tensor) -> NVCV NHWC -> (nvcv_to_tensor) -> NCHW
@@ -6870,7 +6833,7 @@ def test_round_trip_batched(self, num_channels, dtype, batch_size):
 @pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
 @pytest.mark.skipif(not CUDA_AVAILABLE, reason="test requires CUDA")
 class TestNVCVToTensor:
-    @pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
+    @pytest.mark.parametrize("color_space", ["RGB"])
     @pytest.mark.parametrize(
         "fn",
         [F.nvcv_to_tensor, transform_cls_to_functional(transforms.NVCVToTensor)],
diff --git a/torchvision/transforms/v2/functional/_type_conversion.py b/torchvision/transforms/v2/functional/_type_conversion.py
@@ -36,47 +36,31 @@ def _infer_nvcv_format(img_tensor: torch.Tensor):
 
     Args:
         img_tensor: Tensor with shape (H, W, C) where C is number of channels.
+                   Only 3-channel RGB images are supported.
 
     Returns:
-        tuple: (nvcv_format, processed_tensor) where processed_tensor may have reduced dimensions
-               for single channel images.
+        tuple: (nvcv_format, processed_tensor)
 
     Raises:
-        TypeError: If dtype is not supported for the given number of channels.
-        ValueError: If number of channels is not 1 or 3.
+        TypeError: If dtype is not supported.
+        ValueError: If number of channels is not 3.
     """
     import nvcv  # type: ignore[import-not-found]
 
     num_channels = img_tensor.shape[2]
     dtype = img_tensor.dtype
 
-    # Handle single channel images
-    if num_channels == 1:
-        img_tensor = img_tensor[:, :, 0]
-        if dtype == torch.uint8:
-            return nvcv.Format.U8, img_tensor
-        elif dtype == torch.int16:
-            return nvcv.Format.S16, img_tensor
-        elif dtype == torch.int32:
-            return nvcv.Format.S32, img_tensor
-        elif dtype == torch.float32:
-            return nvcv.Format.F32, img_tensor
-        elif dtype == torch.float64:
-            return nvcv.Format.F64, img_tensor
-        else:
-            raise TypeError(f"Unsupported dtype {dtype} for single channel image")
-
-    # Handle 3 channel images (defaults to RGB)
-    elif num_channels == 3:
-        if dtype == torch.uint8:
-            return nvcv.Format.RGB8, img_tensor
-        elif dtype == torch.float32:
-            return nvcv.Format.RGBf32, img_tensor
-        else:
-            # Note: CV-CUDA does not support float64 for RGB images (only F64 for single-channel)
-            raise TypeError(f"Unsupported dtype {dtype} for 3-channel image")
-
-    raise ValueError(f"Only 1 and 3 channel images are supported. Got {num_channels} channels.")
+    # Validate number of channels upfront
+    if num_channels != 3:
+        raise ValueError(f"Only 3-channel RGB images are supported. Got {num_channels} channels.")
+
+    # Handle 3 channel RGB images
+    if dtype == torch.uint8:
+        return nvcv.Format.RGB8, img_tensor
+    elif dtype == torch.float32:
+        return nvcv.Format.RGBf32, img_tensor
+    else:
+        raise TypeError(f"Unsupported dtype {dtype} for RGB images. Only uint8 and float32 are supported.")
 
 
 @torch.jit.unused
@@ -88,7 +72,7 @@ def to_nvcv_tensor(pic) -> "nvcv.Tensor":
     Args:
         pic (torch.Tensor): Image to be converted to nvcv.Tensor.
             Tensor can be in CHW format (unbatched) or NCHW format (batched).
-            Only 1-channel and 3-channel images are supported.
+            Only 3-channel RGB images are supported.
 
     Returns:
         nvcv.Tensor: Image converted to nvcv.Tensor with NHWC layout.
@@ -116,22 +100,12 @@ def to_nvcv_tensor(pic) -> "nvcv.Tensor":
     # Convert NCHW -> NHWC
     img_tensor = img_tensor.permute(0, 2, 3, 1)
 
-    # Infer format from the first image
+    # Infer format from the first image - this validates we have 3 channels
     sample_img = img_tensor[0]
-    _, sample_img = _infer_nvcv_format(sample_img)
-
-    # If format inference removed channel dimension (single channel case)
-    # apply the same transformation to all images
-    if sample_img.ndim == 2:
-        # Batched single channel case: remove channel dimension
-        img_tensor = img_tensor.squeeze(-1)
-        layout = nvcv.TensorLayout.NHW
-    else:
-        # Batched multi-channel
-        layout = nvcv.TensorLayout.NHWC
+    _infer_nvcv_format(sample_img)
 
-    # Convert to NVCV tensor with the appropriate layout
-    return cvcuda.as_tensor(img_tensor.cuda().contiguous(), layout)
+    # Convert to NVCV tensor with NHWC layout (always multi-channel RGB)
+    return cvcuda.as_tensor(img_tensor.cuda().contiguous(), nvcv.TensorLayout.NHWC)
 
 
 @torch.jit.unused