restrict support to 3 channels images

AntoineSimoulin · AntoineSimoulin · commit 6cb29af1c85e · 2025-11-12T06:56:53.000-08:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -6774,16 +6774,6 @@ def test_3_channel_float32_tensor_to_nvcv_tensor(self):
         nvcv_img = F.to_nvcv_tensor(img_data)
         assert nvcv_img is not None
 
-    def test_2d_uint8_tensor_to_nvcv_tensor(self):
-        img_data = torch.randint(0, 256, (4, 4), dtype=torch.uint8, device="cuda")
-        nvcv_img = F.to_nvcv_tensor(img_data)
-        assert nvcv_img is not None
-
-    def test_2d_float32_tensor_to_nvcv_tensor(self):
-        img_data = torch.rand(4, 4, device="cuda")
-        nvcv_img = F.to_nvcv_tensor(img_data)
-        assert nvcv_img is not None
-
     def test_unsupported_num_channels(self):
         # Test 2-channel image (CHW format: 2 channels x 5 height x 5 width)
         img_data = torch.rand(2, 5, 5, device="cuda")
@@ -6806,11 +6796,15 @@ def test_invalid_input_type(self):
 
     def test_invalid_dimensions(self):
         # Test 1D array (too few dimensions)
-        with pytest.raises(ValueError, match=r"pic should be 2/3/4 dimensional"):
+        with pytest.raises(ValueError, match=r"pic should be 3 or 4 dimensional"):
             F.to_nvcv_tensor(torch.randint(0, 256, (4,), dtype=torch.uint8, device="cuda"))
 
+        # Test 2D array (no longer supported)
+        with pytest.raises(ValueError, match=r"pic should be 3 or 4 dimensional"):
+            F.to_nvcv_tensor(torch.randint(0, 256, (4, 4), dtype=torch.uint8, device="cuda"))
+
         # Test 5D array (too many dimensions)
-        with pytest.raises(ValueError, match=r"pic should be 2/3/4 dimensional"):
+        with pytest.raises(ValueError, match=r"pic should be 3 or 4 dimensional"):
             F.to_nvcv_tensor(torch.randint(0, 256, (1, 1, 3, 4, 4), dtype=torch.uint8, device="cuda"))
 
     def test_unsupported_dtype_for_channels(self):
diff --git a/torchvision/transforms/v2/functional/_type_conversion.py b/torchvision/transforms/v2/functional/_type_conversion.py
@@ -100,51 +100,32 @@ def to_nvcv_tensor(pic) -> "nvcv.Tensor":
     if not isinstance(pic, torch.Tensor):
         raise TypeError(f"pic should be `torch.Tensor`. Got {type(pic)}.")
 
-    # Handle different tensor formats and track if input was batched (NCHW) or unbatched (CHW/HW)
-    if pic.ndim == 4:
-        # Batched tensor in NCHW format, permute to NHWC
-        img_tensor = pic.permute(0, 2, 3, 1)
-        input_was_batched = True
-    elif pic.ndim == 3:
-        # Unbatched tensor in CHW format, permute to HWC
-        img_tensor = pic.permute(1, 2, 0)
-        input_was_batched = False
-    else:
-        # 2D or other formats (unbatched single-channel)
+    # Validate dimensions - only support 3D (CHW) or 4D (NCHW)
+    if pic.ndim == 3:
+        # Add fake batch dimension to make it 4D
+        img_tensor = pic.unsqueeze(0)
+    elif pic.ndim == 4:
         img_tensor = pic
-        input_was_batched = False
-
-    # Ensure image has channel dimension for unbatched case
-    if img_tensor.ndim == 2:
-        img_tensor = img_tensor.unsqueeze(2)  # H W -> H W C
+    else:
+        raise ValueError(f"pic should be 3 or 4 dimensional. Got {pic.ndim} dimensions.")
 
-    # Validate dimensions
-    if img_tensor.ndim not in (3, 4):
-        raise ValueError(f"pic should be 2/3/4 dimensional. Got {img_tensor.ndim} dimensions.")
+    # At this point, img_tensor is always 4D in NCHW format
+    # Convert NCHW -> NHWC
+    img_tensor = img_tensor.permute(0, 2, 3, 1)
 
-    # For batched inputs, use the first image to infer format
-    sample_img = img_tensor[0] if img_tensor.ndim == 4 else img_tensor
+    # Infer format from the first image
+    sample_img = img_tensor[0]
     _, sample_img = _infer_nvcv_format(sample_img)
 
-    # If format inference modified the tensor (e.g., removed channel dimension for single channel)
+    # If format inference removed channel dimension (single channel case)
     # apply the same transformation to all images
-    if sample_img.ndim == 2 and img_tensor.ndim == 4:
+    if sample_img.ndim == 2:
         # Batched single channel case: remove channel dimension
         img_tensor = img_tensor.squeeze(-1)
-    elif sample_img.ndim == 2 and img_tensor.ndim == 3:
-        # Unbatched single channel case: replace with 2D tensor
-        img_tensor = sample_img
-
-    # Add batch dimension if not present (NVCV expects batched tensors)
-    if not input_was_batched:
-        img_tensor = img_tensor.unsqueeze(0)  # Add batch dimension at index 0
-
-    # Determine layout based on final tensor shape
-    # After all transformations, tensor is either NHW (single-channel) or NHWC (multi-channel)
-    if img_tensor.ndim == 3:
-        layout = nvcv.TensorLayout.NHW  # Batched single-channel
-    else:  # img_tensor.ndim == 4
-        layout = nvcv.TensorLayout.NHWC  # Batched multi-channel
+        layout = nvcv.TensorLayout.NHW
+    else:
+        # Batched multi-channel
+        layout = nvcv.TensorLayout.NHWC
 
     # Convert to NVCV tensor with the appropriate layout
     return cvcuda.as_tensor(img_tensor.cuda().contiguous(), layout)
@@ -156,10 +137,10 @@ def nvcv_to_tensor(nvcv_img: "nvcv.Tensor") -> torch.Tensor:
 
     Args:
         nvcv_img (nvcv.Tensor): nvcv.Tensor to be converted to PyTorch tensor.
-            Expected to be in NHWC or NHW layout (for batched images) or HWC or HW layout (for unbatched).
+            Expected to be in NHWC or NHW layout (batched images only).
 
     Returns:
-        torch.Tensor: Converted image in CHW format (unbatched) or NCHW format (batched).
+        torch.Tensor: Converted image in NCHW format (batched).
     """
     import nvcv  # type: ignore[import-not-found]
 
@@ -174,31 +155,17 @@ def nvcv_to_tensor(nvcv_img: "nvcv.Tensor") -> torch.Tensor:
     # NVCV tensors expose __cuda_array_interface__ which PyTorch can consume directly
     cuda_tensor = torch.as_tensor(nvcv_img.cuda(), device="cuda")
 
-    # Handle different dimensionalities
-    # NVCV stores images in NHWC (batched multi-channel), NHW (batched single-channel),
-    # HWC (unbatched multi-channel), or HW (unbatched single-channel) format
+    # Only support 4D (NHWC) or 3D (NHW) batched tensors
+    # NVCV stores images in NHWC (batched multi-channel) or NHW (batched single-channel) format
     if cuda_tensor.ndim == 4:
         # Batched multi-channel image in NHWC format
         # Convert NHWC -> NCHW
         img = cuda_tensor.permute(0, 3, 1, 2).contiguous()
     elif cuda_tensor.ndim == 3:
-        # Could be either:
-        # 1. Unbatched multi-channel (HWC) - last dim is 1 or 3
-        # 2. Batched single-channel (NHW) - last dim is width
-        # We distinguish by checking if last dimension is 1 or 3 (our supported channel counts)
-        if cuda_tensor.shape[2] in (1, 3):
-            # Unbatched multi-channel image in HWC format
-            # Convert HWC -> CHW
-            img = cuda_tensor.permute(2, 0, 1).contiguous()
-        else:
-            # Batched single-channel image in NHW format
-            # Convert NHW -> NCHW by adding channel dimension
-            img = cuda_tensor.unsqueeze(1).contiguous()
-    elif cuda_tensor.ndim == 2:
-        # Unbatched single-channel image in HW format
-        # Convert HW -> CHW by adding channel dimension
-        img = cuda_tensor.unsqueeze(0).contiguous()
+        # Batched single-channel image in NHW format
+        # Convert NHW -> NCHW by adding channel dimension
+        img = cuda_tensor.unsqueeze(1).contiguous()
     else:
-        raise ValueError(f"Image should be 2/3/4 dimensional. Got {cuda_tensor.ndim} dimensions.")
+        raise ValueError(f"Image should be 3 or 4 dimensional. Got {cuda_tensor.ndim} dimensions.")
 
     return img