remove device_variant stuff, parametrize test w cuda device

Dan-Flores · Dan-Flores · commit a0f594eda8ab · 2025-10-29T20:40:39.000Z
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str device=\"cpu\", str device_variant=\"ffmpeg\", int? crf=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str device=\"cpu\", int? crf=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str device=\"cpu\", str device_variant=\"ffmpeg\", int? crf=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str device=\"cpu\", int? crf=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str device=\"cpu\", str device_variant=\"ffmpeg\",int? crf=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str device=\"cpu\",int? crf=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -604,14 +604,12 @@ void encode_video_to_file(
     int64_t frame_rate,
     std::string_view file_name,
     std::string_view device = "cpu",
-    std::string_view device_variant = "ffmpeg",
     std::optional<int64_t> crf = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.crf = crf;
 
-  validateDeviceInterface(std::string(device), std::string(device_variant));
   videoStreamOptions.device = torch::Device(std::string(device));
-  videoStreamOptions.deviceVariant = device_variant;
+  videoStreamOptions.deviceVariant = "ffmpeg";
   VideoEncoder(
       frames,
       validateInt64ToInt(frame_rate, "frame_rate"),
@@ -625,15 +623,13 @@ at::Tensor encode_video_to_tensor(
     int64_t frame_rate,
     std::string_view format,
     std::string_view device = "cpu",
-    std::string_view device_variant = "ffmpeg",
     std::optional<int64_t> crf = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.crf = crf;
 
-  validateDeviceInterface(std::string(device), std::string(device_variant));
   videoStreamOptions.device = torch::Device(std::string(device));
-  videoStreamOptions.deviceVariant = device_variant;
+  videoStreamOptions.deviceVariant = "ffmpeg";
   return VideoEncoder(
              frames,
              validateInt64ToInt(frame_rate, "frame_rate"),
@@ -649,7 +645,6 @@ void _encode_video_to_file_like(
     std::string_view format,
     int64_t file_like_context,
     std::string_view device = "cpu",
-    std::string_view device_variant = "ffmpeg",
     std::optional<int64_t> crf = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
@@ -660,9 +655,8 @@ void _encode_video_to_file_like(
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.crf = crf;
 
-  validateDeviceInterface(std::string(device), std::string(device_variant));
   videoStreamOptions.device = torch::Device(std::string(device));
-  videoStreamOptions.deviceVariant = device_variant;
+  videoStreamOptions.deviceVariant = "ffmpeg";
 
   VideoEncoder encoder(
       frames,
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -213,7 +213,6 @@ def encode_video_to_file_like(
     format: str,
     file_like: Union[io.RawIOBase, io.BufferedIOBase],
     device: str = "cpu",
-    device_variant: str = "ffmpeg",
     crf: Optional[int] = None,
 ) -> None:
     """Encode video frames to a file-like object.
@@ -224,7 +223,6 @@ def encode_video_to_file_like(
         format: Video format (e.g., "mp4", "mov", "mkv")
         file_like: File-like object that supports write() and seek() methods
         device: Device to use for encoding (default: "cpu")
-        device_variant:
         crf: Optional constant rate factor for encoding quality
     """
     assert _pybind_ops is not None
@@ -235,7 +233,6 @@ def encode_video_to_file_like(
         format,
         _pybind_ops.create_file_like_context(file_like, True),  # True means for writing
         device,
-        device_variant,
         crf,
     )
 
@@ -325,7 +322,6 @@ def encode_video_to_file_abstract(
     frame_rate: int,
     filename: str,
     device: str = "cpu",
-    device_variant: str = "ffmpeg",
     crf: Optional[int] = None,
 ) -> None:
     return
@@ -337,7 +333,6 @@ def encode_video_to_tensor_abstract(
     frame_rate: int,
     format: str,
     device: str = "cpu",
-    device_variant: str = "ffmpeg",
     crf: Optional[int] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
@@ -350,7 +345,6 @@ def _encode_video_to_file_like_abstract(
     format: str,
     file_like_context: int,
     device: str = "cpu",
-    device_variant: str = "ffmpeg",
     crf: Optional[int] = None,
 ) -> None:
     return
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
@@ -5,7 +5,6 @@
 from torch import device as torch_device, Tensor
 
 from torchcodec import _core
-from torchcodec.decoders._decoder_utils import _get_cuda_backend
 
 
 class VideoEncoder:
@@ -43,14 +42,6 @@ def __init__(
         if isinstance(device, torch_device):
             device = str(device)
 
-        # Check if beta variant is being used and reject it
-        device_variant = _get_cuda_backend()
-        if "cuda" in device.lower() and device_variant == "beta":
-            raise ValueError(
-                "The beta CUDA backend is not supported for video encoding. "
-                "Please use device='cuda' without the beta backend context manager."
-            )
-
         self._frames = frames
         self._frame_rate = frame_rate
         self._device = device
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -573,12 +573,6 @@ class TestVideoEncoder:
     def decode(self, source=None) -> torch.Tensor:
         return VideoDecoder(source).get_frames_in_range(start=0, stop=60)
 
-    def save_image(self, a, b, name):
-        from torchvision.io import write_png
-        from torchvision.utils import make_grid
-        image = make_grid(torch.stack([a, b]), nrow=2).cpu()
-        write_png(image, f"{name}.png")
-
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
     def test_bad_input_parameterized(self, tmp_path, method):
         if method == "to_file":
@@ -642,15 +636,20 @@ def test_bad_input(self, tmp_path):
             encoder.to_tensor(format="bad_format")
 
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
-    def test_contiguity(self, method, tmp_path):
+    @pytest.mark.parametrize(
+        "device", ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
+    )
+    def test_contiguity(self, method, tmp_path, device):
         # Ensure that 2 sets of video frames with the same pixel values are encoded
         # in the same way, regardless of their memory layout. Here we encode 2 equal
         # frame tensors, one is contiguous while the other is non-contiguous.
 
-        num_frames, channels, height, width = 5, 3, 64, 64
-        contiguous_frames = torch.randint(
-            0, 256, size=(num_frames, channels, height, width), dtype=torch.uint8
-        ).contiguous()
+        num_frames, channels, height, width = 5, 3, 256, 256
+        contiguous_frames = (
+            (torch.rand(num_frames, channels, height, width) * 255)
+            .to(torch.uint8)
+            .contiguous()
+        )
         assert contiguous_frames.is_contiguous()
 
         # Permute NCHW to NHWC, then update the memory layout, then permute back
@@ -668,14 +667,14 @@ def test_contiguity(self, method, tmp_path):
         def encode_to_tensor(frames):
             if method == "to_file":
                 dest = str(tmp_path / "output.mp4")
-                VideoEncoder(frames, frame_rate=30).to_file(dest=dest)
+                VideoEncoder(frames, frame_rate=30, device=device).to_file(dest=dest)
                 with open(dest, "rb") as f:
                     return torch.frombuffer(f.read(), dtype=torch.uint8).clone()
             elif method == "to_tensor":
-                return VideoEncoder(frames, frame_rate=30).to_tensor(format="mp4")
+                return VideoEncoder(frames, frame_rate=30, device=device).to_tensor(format="mp4")
             elif method == "to_file_like":
                 file_like = io.BytesIO()
-                VideoEncoder(frames, frame_rate=30).to_file_like(
+                VideoEncoder(frames, frame_rate=30, device=device).to_file_like(
                     file_like, format="mp4"
                 )
                 return torch.frombuffer(file_like.getvalue(), dtype=torch.uint8)
@@ -708,30 +707,16 @@ def test_device_video_encoder(self, method, device, tmp_path):
             encoder.to_file(dest=dest)
             # Verify file was created
             assert Path(dest).exists()
-            self.save_image(
-                frames[0],
-                self.decode(Path(dest)).data[0],
-                name=f"{device}_to_file",
-            )
         elif method == "to_tensor":
             encoded = encoder.to_tensor(format="mp4")
             assert encoded.dtype == torch.uint8
             assert encoded.ndim == 1
             assert encoded.numel() > 0
-            self.save_image(
-                frames[0],
-                self.decode(encoded).data[0],
-                name=f"{device}_to_tensor",
-            )
         elif method == "to_file_like":
             file_like = io.BytesIO()
             encoder.to_file_like(file_like, format="mp4")
             encoded_bytes = file_like.getvalue()
             assert len(encoded_bytes) > 0
-            self.save_image(
-                frames[0],
-                self.decode(encoded_bytes).data[0],
-                name=f"{device}_to_file_like",
-            )
         else:
             raise ValueError(f"Unknown method: {method}")
+
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -1159,7 +1159,10 @@ def decode(self, source=None) -> torch.Tensor:
         "format", ("mov", "mp4", "mkv", pytest.param("webm", marks=pytest.mark.slow))
     )
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
-    def test_video_encoder_round_trip(self, tmp_path, format, method):
+    @pytest.mark.parametrize(
+        "device", ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
+    )
+    def test_video_encoder_round_trip(self, tmp_path, format, method, device):
         # Test that decode(encode(decode(frames))) == decode(frames)
         ffmpeg_version = get_ffmpeg_major_version()
         # In FFmpeg6, the default codec's best pixel format is lossy for all container formats but webm.
@@ -1174,9 +1177,10 @@ def test_video_encoder_round_trip(self, tmp_path, format, method):
             pytest.skip("Codec for webm is not available in this FFmpeg installation.")
         source_frames = self.decode(TEST_SRC_2_720P.path).data
 
+        # Frame rate is fixed with num frames decoded
         params = dict(
-            frame_rate=30, crf=0
-        )  # Frame rate is fixed with num frames decoded
+            frame_rate=30, crf=0, device=device
+        )  
         if method == "to_file":
             encoded_path = str(tmp_path / f"encoder_output.{format}")
             encode_video_to_file(
@@ -1207,9 +1211,10 @@ def test_video_encoder_round_trip(self, tmp_path, format, method):
 
         # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels
         # are within a higher tolerance.
-        if ffmpeg_version == 6:
-            assert_close = partial(assert_tensor_close_on_at_least, percentage=99)
+        if ffmpeg_version == 6 or device == "cuda":
             atol = 15
+            percentage = 98
+            assert_close = partial(assert_tensor_close_on_at_least, percentage=percentage)
         else:
             assert_close = torch.testing.assert_close
             atol = 2
@@ -1230,7 +1235,10 @@ def test_video_encoder_round_trip(self, tmp_path, format, method):
         ),
     )
     @pytest.mark.parametrize("method", ("to_tensor", "to_file_like"))
-    def test_against_to_file(self, tmp_path, format, method):
+    @pytest.mark.parametrize(
+        "device", ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
+    )
+    def test_against_to_file(self, tmp_path, format, method, device):
         # Test that to_file, to_tensor, and to_file_like produce the same results
         ffmpeg_version = get_ffmpeg_major_version()
         if format == "webm" and (
@@ -1239,7 +1247,7 @@ def test_against_to_file(self, tmp_path, format, method):
             pytest.skip("Codec for webm is not available in this FFmpeg installation.")
 
         source_frames = self.decode(TEST_SRC_2_720P.path).data
-        params = dict(frame_rate=30, crf=0)
+        params = dict(frame_rate=30, crf=0, device=device)
 
         encoded_file = tmp_path / f"output.{format}"
         encode_video_to_file(frames=source_frames, filename=str(encoded_file), **params)
@@ -1313,10 +1321,22 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, device):
             str(frame_rate),
             "-i",
             temp_raw_path,
+        ]
+
+        # Use NVENC encoder when device is CUDA and format has an NVENC codec
+        if device == "cuda":
+            if format in ("mp4", "mov", "mkv"):
+                ffmpeg_cmd.extend(["-c:v", "h264_nvenc"])
+            elif format == "webm":
+                ffmpeg_cmd.extend(["-c:v", "vp9_nvenc"])  # Use NVENC for VP9
+            # TODO-VideoEncoder: formats "flv", "avi" should also use respective NVENC codecs, 
+            # but do not auto select them.
+
+        ffmpeg_cmd.extend([
             "-crf",
             str(crf),
             ffmpeg_encoded_path,
-        ]
+        ])
         subprocess.run(ffmpeg_cmd, check=True)
 
         # Encode with our video encoder
@@ -1347,7 +1367,10 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, device):
                 ff_frame, enc_frame, percentage=percentage, atol=2
             )
 
-    def test_to_file_like_custom_file_object(self):
+    @pytest.mark.parametrize(
+        "device", ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
+    )
+    def test_to_file_like_custom_file_object(self, device):
         """Test to_file_like with a custom file-like object that implements write and seek."""
 
         class CustomFileObject:
@@ -1366,32 +1389,54 @@ def get_encoded_data(self):
         source_frames = self.decode(TEST_SRC_2_720P.path).data
         file_like = CustomFileObject()
         encode_video_to_file_like(
-            source_frames, frame_rate=30, crf=0, format="mp4", file_like=file_like
+            source_frames, frame_rate=30, crf=0, format="mp4", file_like=file_like, device=device
         )
         decoded_samples = self.decode(file_like.get_encoded_data())
 
-        torch.testing.assert_close(
+        ffmpeg_version = get_ffmpeg_major_version()
+        if device == "cuda":
+            atol = 15
+            percentage = 98
+            assert_close = partial(assert_tensor_close_on_at_least, percentage=percentage)
+        else:
+            assert_close = torch.testing.assert_close
+            atol = 2
+
+        assert_close(
             decoded_samples.data,
             source_frames,
-            atol=2,
+            atol=atol,
             rtol=0,
         )
 
-    def test_to_file_like_real_file(self, tmp_path):
+    @pytest.mark.parametrize(
+        "device", ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
+    )
+    def test_to_file_like_real_file(self, tmp_path, device):
         """Test to_file_like with a real file opened in binary write mode."""
         source_frames = self.decode(TEST_SRC_2_720P.path).data
         file_path = tmp_path / "test_file_like.mp4"
 
         with open(file_path, "wb") as file_like:
             encode_video_to_file_like(
-                source_frames, frame_rate=30, crf=0, format="mp4", file_like=file_like
+                source_frames, frame_rate=30, crf=0, format="mp4", file_like=file_like, device=device
             )
         decoded_samples = self.decode(str(file_path))
 
-        torch.testing.assert_close(
+        # Use adaptive tolerance based on device and FFmpeg version, consistent with test_video_encoder_round_trip
+        ffmpeg_version = get_ffmpeg_major_version()
+        if device == "cuda":
+            atol = 15
+            percentage = 98
+            assert_close = partial(assert_tensor_close_on_at_least, percentage=percentage)
+        else:
+            assert_close = torch.testing.assert_close
+            atol = 2
+
+        assert_close(
             decoded_samples.data,
             source_frames,
-            atol=2,
+            atol=atol,
             rtol=0,
         )