pass device around, basic function test

Daniel Flores · Dan-Flores · commit 144c394cb92a · 2025-10-29T20:38:51.000Z
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, int? crf=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str device=\"cpu\", str device_variant=\"ffmpeg\", int? crf=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, int? crf=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str device=\"cpu\", str device_variant=\"ffmpeg\", int? crf=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, int? crf=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str device=\"cpu\", str device_variant=\"ffmpeg\",int? crf=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -603,9 +603,15 @@ void encode_video_to_file(
     const at::Tensor& frames,
     int64_t frame_rate,
     std::string_view file_name,
+    std::string_view device = "cpu",
+    std::string_view device_variant = "ffmpeg",
     std::optional<int64_t> crf = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.crf = crf;
+
+  validateDeviceInterface(std::string(device), std::string(device_variant));
+  videoStreamOptions.device = torch::Device(std::string(device));
+  videoStreamOptions.deviceVariant = device_variant;
   VideoEncoder(
       frames,
       validateInt64ToInt(frame_rate, "frame_rate"),
@@ -618,10 +624,16 @@ at::Tensor encode_video_to_tensor(
     const at::Tensor& frames,
     int64_t frame_rate,
     std::string_view format,
+    std::string_view device = "cpu",
+    std::string_view device_variant = "ffmpeg",
     std::optional<int64_t> crf = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.crf = crf;
+
+  validateDeviceInterface(std::string(device), std::string(device_variant));
+  videoStreamOptions.device = torch::Device(std::string(device));
+  videoStreamOptions.deviceVariant = device_variant;
   return VideoEncoder(
              frames,
              validateInt64ToInt(frame_rate, "frame_rate"),
@@ -636,6 +648,8 @@ void _encode_video_to_file_like(
     int64_t frame_rate,
     std::string_view format,
     int64_t file_like_context,
+    std::string_view device = "cpu",
+    std::string_view device_variant = "ffmpeg",
     std::optional<int64_t> crf = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
@@ -646,6 +660,10 @@ void _encode_video_to_file_like(
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.crf = crf;
 
+  validateDeviceInterface(std::string(device), std::string(device_variant));
+  videoStreamOptions.device = torch::Device(std::string(device));
+  videoStreamOptions.deviceVariant = device_variant;
+
   VideoEncoder encoder(
       frames,
       validateInt64ToInt(frame_rate, "frame_rate"),
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -212,6 +212,8 @@ def encode_video_to_file_like(
     frame_rate: int,
     format: str,
     file_like: Union[io.RawIOBase, io.BufferedIOBase],
+    device: str = "cpu",
+    device_variant: str = "ffmpeg",
     crf: Optional[int] = None,
 ) -> None:
     """Encode video frames to a file-like object.
@@ -221,6 +223,8 @@ def encode_video_to_file_like(
         frame_rate: Frame rate in frames per second
         format: Video format (e.g., "mp4", "mov", "mkv")
         file_like: File-like object that supports write() and seek() methods
+        device: Device to use for encoding (default: "cpu")
+        device_variant:
         crf: Optional constant rate factor for encoding quality
     """
     assert _pybind_ops is not None
@@ -230,6 +234,8 @@ def encode_video_to_file_like(
         frame_rate,
         format,
         _pybind_ops.create_file_like_context(file_like, True),  # True means for writing
+        device,
+        device_variant,
         crf,
     )
 
@@ -318,7 +324,9 @@ def encode_video_to_file_abstract(
     frames: torch.Tensor,
     frame_rate: int,
     filename: str,
-    crf: Optional[int],
+    device: str = "cpu",
+    device_variant: str = "ffmpeg",
+    crf: Optional[int] = None,
 ) -> None:
     return
 
@@ -328,7 +336,9 @@ def encode_video_to_tensor_abstract(
     frames: torch.Tensor,
     frame_rate: int,
     format: str,
-    crf: Optional[int],
+    device: str = "cpu",
+    device_variant: str = "ffmpeg",
+    crf: Optional[int] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
 
@@ -339,6 +349,8 @@ def _encode_video_to_file_like_abstract(
     frame_rate: int,
     format: str,
     file_like_context: int,
+    device: str = "cpu",
+    device_variant: str = "ffmpeg",
     crf: Optional[int] = None,
 ) -> None:
     return
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
@@ -1,10 +1,11 @@
 from pathlib import Path
-from typing import Union
+from typing import Optional, Union
 
 import torch
-from torch import Tensor
+from torch import device as torch_device, Tensor
 
 from torchcodec import _core
+from torchcodec.decoders._decoder_utils import _get_cuda_backend
 
 
 class VideoEncoder:
@@ -16,6 +17,9 @@ class VideoEncoder:
             C is 3 channels (RGB), H is height, and W is width.
             Values must be uint8 in the range ``[0, 255]``.
         frame_rate (int): The frame rate of the **input** ``frames``. Also defines the encoded **output** frame rate.
+        device (str or torch.device, optional): The device to use for encoding. Default: "cpu".
+            If you pass a CUDA device, frames will be encoded on GPU.
+            Note: The "beta" CUDA backend is not supported for encoding.
     """
 
     def __init__(self, frames: Tensor, *, frame_rate: int):
@@ -29,8 +33,21 @@ def __init__(self, frames: Tensor, *, frame_rate: int):
         if frame_rate <= 0:
             raise ValueError(f"{frame_rate = } must be > 0.")
 
+        # Validate and store device
+        if isinstance(device, torch_device):
+            device = str(device)
+
+        # Check if beta variant is being used and reject it
+        device_variant = _get_cuda_backend()
+        if "cuda" in device.lower() and device_variant == "beta":
+            raise ValueError(
+                "The beta CUDA backend is not supported for video encoding. "
+                "Please use device='cuda' without the beta backend context manager."
+            )
+
         self._frames = frames
         self._frame_rate = frame_rate
+        self._device = device
 
     def to_file(
         self,
@@ -47,6 +64,7 @@ def to_file(
             frames=self._frames,
             frame_rate=self._frame_rate,
             filename=str(dest),
+            device=self._device,
         )
 
     def to_tensor(
@@ -66,6 +84,7 @@ def to_tensor(
             frames=self._frames,
             frame_rate=self._frame_rate,
             format=format,
+            device=self._device,
         )
 
     def to_file_like(
@@ -89,4 +108,5 @@ def to_file_like(
             frame_rate=self._frame_rate,
             format=format,
             file_like=file_like,
+            device=self._device,
         )
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -676,3 +676,33 @@ def encode_to_tensor(frames):
         torch.testing.assert_close(
             encoded_from_contiguous, encoded_from_non_contiguous, rtol=0, atol=0
         )
+
+    @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
+    @pytest.mark.parametrize(
+        "device", ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
+    )
+    def test_device_video_encoder(self, method, device, tmp_path):
+        # Test that encoding works on CPU and CUDA devices
+        num_frames, channels, height, width = 5, 3, 64, 64
+        frames = (torch.rand(num_frames, channels, height, width) * 255).to(torch.uint8)
+
+        encoder = VideoEncoder(frames, frame_rate=30, device=device)
+
+        if method == "to_file":
+            dest = str(tmp_path / "output.mp4")
+            encoder.to_file(dest=dest)
+            # Verify file was created
+            assert Path(dest).exists()
+        elif method == "to_tensor":
+            encoded = encoder.to_tensor(format="mp4")
+            assert encoded.dtype == torch.uint8
+            assert encoded.ndim == 1
+            assert encoded.numel() > 0
+        elif method == "to_file_like":
+            file_like = io.BytesIO()
+            encoder.to_file_like(file_like, format="mp4")
+            encoded_bytes = file_like.getvalue()
+            assert len(encoded_bytes) > 0
+        else:
+            raise ValueError(f"Unknown method: {method}")
+class VideoEncoder
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -1375,8 +1375,6 @@ def get_encoded_data(self):
 
     def test_to_file_like_real_file(self, tmp_path):
         """Test to_file_like with a real file opened in binary write mode."""
-        if get_ffmpeg_major_version() == 6:
-            pytest.skip("Skipping round trip test for FFmpeg 6")
         source_frames = self.decode(TEST_SRC_2_720P.path).data
         file_path = tmp_path / "test_file_like.mp4"