From 4b275f41c38527b38c0f105be376904b99681688 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 7 Nov 2025 14:22:50 -0500 Subject: [PATCH 1/5] add crf to api, move and update tests --- src/torchcodec/encoders/_video_encoder.py | 19 +- test/test_encoders.py | 245 +++++++++++++++++++++- test/test_ops.py | 10 - 3 files changed, 261 insertions(+), 13 deletions(-) diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index e0630d012..52c7dcbbf 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -37,6 +37,7 @@ def to_file( dest: Union[str, Path], *, pixel_format: Optional[str] = None, + crf: int = None, ) -> None: """Encode frames into a file. @@ -46,12 +47,16 @@ def to_file( container format. pixel_format (str, optional): The pixel format for encoding (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. + crf (int, optional): Constant Rate Factor for encoding quality. Lower values + mean better quality. Valid range depends on the encoder (commonly 0-51). + Defaults to None (which will use encoder's default). """ _core.encode_video_to_file( frames=self._frames, frame_rate=self._frame_rate, filename=str(dest), pixel_format=pixel_format, + crf=crf, ) def to_tensor( @@ -59,14 +64,18 @@ def to_tensor( format: str, *, pixel_format: Optional[str] = None, + crf: int = None, ) -> Tensor: """Encode frames into raw bytes, as a 1D uint8 Tensor. Args: format (str): The container format of the encoded frames, e.g. "mp4", "mov", - "mkv", "avi", "webm", "flv", or "gif" + "mkv", "avi", "webm", "flv", etc. pixel_format (str, optional): The pixel format to encode frames into (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. + crf (int, optional): Constant Rate Factor for encoding quality. Lower values + mean better quality. Valid range depends on the encoder (commonly 0-51). + Defaults to None (which will use encoder's default). Returns: Tensor: The raw encoded bytes as 4D uint8 Tensor. @@ -76,6 +85,7 @@ def to_tensor( frame_rate=self._frame_rate, format=format, pixel_format=pixel_format, + crf=crf, ) def to_file_like( @@ -84,6 +94,7 @@ def to_file_like( format: str, *, pixel_format: Optional[str] = None, + crf: int = None, ) -> None: """Encode frames into a file-like object. @@ -94,9 +105,12 @@ def to_file_like( ``write(data: bytes) -> int`` and ``seek(offset: int, whence: int = 0) -> int``. format (str): The container format of the encoded frames, e.g. "mp4", "mov", - "mkv", "avi", "webm", "flv", or "gif". + "mkv", "avi", "webm", "flv", etc. pixel_format (str, optional): The pixel format for encoding (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. + crf (int, optional): Constant Rate Factor for encoding quality. Lower values + mean better quality. Valid range depends on the encoder (commonly 0-51). + Defaults to None (which will use encoder's default). """ _core.encode_video_to_file_like( frames=self._frames, @@ -104,4 +118,5 @@ def to_file_like( format=format, file_like=file_like, pixel_format=pixel_format, + crf=crf, ) diff --git a/test/test_encoders.py b/test/test_encoders.py index 922b67bbb..1adc7603a 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -9,7 +9,7 @@ import pytest import torch -from torchcodec.decoders import AudioDecoder +from torchcodec.decoders import AudioDecoder, VideoDecoder from torchcodec.encoders import AudioEncoder, VideoEncoder @@ -20,7 +20,9 @@ in_fbcode, IS_WINDOWS, NASA_AUDIO_MP3, + psnr, SINE_MONO_S32, + TEST_SRC_2_720P, TestContainerFile, ) @@ -567,6 +569,9 @@ def write(self, data): class TestVideoEncoder: + def decode(self, source=None) -> torch.Tensor: + return VideoDecoder(source).get_frames_in_range(start=0, stop=60) + @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) def test_bad_input_parameterized(self, tmp_path, method): if method == "to_file": @@ -700,3 +705,241 @@ def encode_to_tensor(frames): torch.testing.assert_close( encoded_from_contiguous, encoded_from_non_contiguous, rtol=0, atol=0 ) + + @pytest.mark.parametrize( + "format", ("mov", "mp4", "mkv", pytest.param("webm", marks=pytest.mark.slow)) + ) + @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) + def test_round_trip(self, tmp_path, format, method): + # Test that decode(encode(decode(frames))) == decode(frames) + ffmpeg_version = get_ffmpeg_major_version() + # In FFmpeg6, the default codec's best pixel format is lossy for all container formats but webm. + # As a result, we skip the round trip test. + if ffmpeg_version == 6 and format != "webm": + pytest.skip( + f"FFmpeg6 defaults to lossy encoding for {format}, skipping round-trip test." + ) + if format == "webm" and ( + ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) + ): + pytest.skip("Codec for webm is not available in this FFmpeg installation.") + source_frames = self.decode(TEST_SRC_2_720P.path).data + + # Frame rate is fixed with num frames decoded + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + if method == "to_file": + encoded_path = str(tmp_path / f"encoder_output.{format}") + encoder.to_file(dest=encoded_path, crf=0) + round_trip_frames = self.decode(encoded_path).data + elif method == "to_tensor": + encoded_tensor = encoder.to_tensor(format=format, crf=0) + round_trip_frames = self.decode(encoded_tensor).data + elif method == "to_file_like": + file_like = io.BytesIO() + encoder.to_file_like(file_like=file_like, format=format, crf=0) + round_trip_frames = self.decode(file_like.getvalue()).data + else: + raise ValueError(f"Unknown method: {method}") + + assert source_frames.shape == round_trip_frames.shape + assert source_frames.dtype == round_trip_frames.dtype + + # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels + # are within a higher tolerance. + if ffmpeg_version == 6: + assert_close = partial(assert_tensor_close_on_at_least, percentage=99) + atol = 15 + else: + assert_close = torch.testing.assert_close + atol = 2 + for s_frame, rt_frame in zip(source_frames, round_trip_frames): + assert psnr(s_frame, rt_frame) > 30 + assert_close(s_frame, rt_frame, atol=atol, rtol=0) + + @pytest.mark.parametrize( + "format", + ( + "mov", + "mp4", + "avi", + "mkv", + "flv", + "gif", + pytest.param("webm", marks=pytest.mark.slow), + ), + ) + @pytest.mark.parametrize("method", ("to_tensor", "to_file_like")) + def test_against_to_file(self, tmp_path, format, method): + # Test that to_file, to_tensor, and to_file_like produce the same results + ffmpeg_version = get_ffmpeg_major_version() + if format == "webm" and ( + ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) + ): + pytest.skip("Codec for webm is not available in this FFmpeg installation.") + + source_frames = self.decode(TEST_SRC_2_720P.path).data + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + encoded_file = tmp_path / f"output.{format}" + encoder.to_file(dest=encoded_file, crf=0) + + if method == "to_tensor": + encoded_output = encoder.to_tensor(format=format, crf=0) + else: # to_file_like + file_like = io.BytesIO() + encoder.to_file_like(file_like=file_like, format=format, crf=0) + encoded_output = file_like.getvalue() + + torch.testing.assert_close( + self.decode(encoded_file).data, + self.decode(encoded_output).data, + atol=0, + rtol=0, + ) + + @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") + @pytest.mark.parametrize( + "format", + ( + "mov", + "mp4", + "avi", + "mkv", + "flv", + "gif", + pytest.param("webm", marks=pytest.mark.slow), + ), + ) + def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): + # Encode samples with our encoder and with the FFmpeg CLI, and check + # that both decoded outputs are similar + ffmpeg_version = get_ffmpeg_major_version() + if format == "webm" and ( + ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) + ): + pytest.skip("Codec for webm is not available in this FFmpeg installation.") + + source_frames = self.decode(TEST_SRC_2_720P.path).data + + # Encode with FFmpeg CLI + temp_raw_path = str(tmp_path / "temp_input.raw") + with open(temp_raw_path, "wb") as f: + f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes()) + + ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}") + frame_rate = 30 + crf = 0 + # Some codecs (ex. MPEG4) do not support CRF. + # Flags not supported by the selected codec will be ignored. + ffmpeg_cmd = [ + "ffmpeg", + "-y", + "-f", + "rawvideo", + "-pix_fmt", + "rgb24", + "-s", + f"{source_frames.shape[3]}x{source_frames.shape[2]}", + "-r", + str(frame_rate), + "-i", + temp_raw_path, + "-crf", + str(crf), + ffmpeg_encoded_path, + ] + subprocess.run(ffmpeg_cmd, check=True) + + # Encode with our video encoder + encoder_output_path = str(tmp_path / f"encoder_output.{format}") + encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate) + encoder.to_file(dest=encoder_output_path, crf=crf) + + ffmpeg_frames = self.decode(ffmpeg_encoded_path).data + encoder_frames = self.decode(encoder_output_path).data + + assert ffmpeg_frames.shape[0] == encoder_frames.shape[0] + + # If FFmpeg selects a codec or pixel format that uses qscale (not crf), + # the VideoEncoder outputs *slightly* different frames. + # There may be additional subtle differences in the encoder. + percentage = 94 if ffmpeg_version == 6 or format == "avi" else 99 + + # Check that PSNR between both encoded versions is high + for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): + res = psnr(ff_frame, enc_frame) + assert res > 30 + assert_tensor_close_on_at_least( + ff_frame, enc_frame, percentage=percentage, atol=2 + ) + + def test_to_file_like_custom_file_object(self): + """Test to_file_like with a custom file-like object that implements write and seek.""" + + class CustomFileObject: + def __init__(self): + self._file = io.BytesIO() + + def write(self, data): + return self._file.write(data) + + def seek(self, offset, whence=0): + return self._file.seek(offset, whence) + + def get_encoded_data(self): + return self._file.getvalue() + + source_frames = self.decode(TEST_SRC_2_720P.path).data + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + file_like = CustomFileObject() + encoder.to_file_like(file_like, format="mp4", crf=0) + decoded_frames = self.decode(file_like.get_encoded_data()) + + torch.testing.assert_close( + decoded_frames.data, + source_frames, + atol=2, + rtol=0, + ) + + def test_to_file_like_real_file(self, tmp_path): + """Test to_file_like with a real file opened in binary write mode.""" + source_frames = self.decode(TEST_SRC_2_720P.path).data + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + file_path = tmp_path / "test_file_like.mp4" + + with open(file_path, "wb") as file_like: + encoder.to_file_like(file_like, format="mp4", crf=0) + decoded_frames = self.decode(str(file_path)) + + torch.testing.assert_close( + decoded_frames.data, + source_frames, + atol=2, + rtol=0, + ) + + def test_to_file_like_bad_methods(self): + source_frames = self.decode(TEST_SRC_2_720P.path).data + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + class NoWriteMethod: + def seek(self, offset, whence=0): + return 0 + + with pytest.raises( + RuntimeError, match="File like object must implement a write method" + ): + encoder.to_file_like(NoWriteMethod(), format="mp4") + + class NoSeekMethod: + def write(self, data): + return len(data) + + with pytest.raises( + RuntimeError, match="File like object must implement a seek method" + ): + encoder.to_file_like(NoSeekMethod(), format="mp4") diff --git a/test/test_ops.py b/test/test_ops.py index bb6ce601b..ac43e20ae 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -10,7 +10,6 @@ os.environ["TORCH_LOGS"] = "output_code" import json -import subprocess import numpy as np import pytest @@ -27,9 +26,6 @@ create_from_file_like, create_from_tensor, encode_audio_to_file, - encode_video_to_file, - encode_video_to_file_like, - encode_video_to_tensor, get_ffmpeg_library_versions, get_frame_at_index, get_frame_at_pts, @@ -42,24 +38,18 @@ get_next_frame, seek_to_pts, ) -from torchcodec.decoders import VideoDecoder from .utils import ( all_supported_devices, assert_frames_equal, - assert_tensor_close_on_at_least, - get_ffmpeg_major_version, in_fbcode, - IS_WINDOWS, NASA_AUDIO, NASA_AUDIO_MP3, NASA_VIDEO, needs_cuda, - psnr, SINE_MONO_S32, SINE_MONO_S32_44100, SINE_MONO_S32_8000, - TEST_SRC_2_720P, unsplit_device_str, ) From 740a675f62f8e542b4ff4121f90329f8e3085118 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 7 Nov 2025 14:35:40 -0500 Subject: [PATCH 2/5] set crf as optional --- src/torchcodec/encoders/_video_encoder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index 52c7dcbbf..1d0a05977 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -37,7 +37,7 @@ def to_file( dest: Union[str, Path], *, pixel_format: Optional[str] = None, - crf: int = None, + crf: Optional[int] = None, ) -> None: """Encode frames into a file. @@ -64,7 +64,7 @@ def to_tensor( format: str, *, pixel_format: Optional[str] = None, - crf: int = None, + crf: Optional[int] = None, ) -> Tensor: """Encode frames into raw bytes, as a 1D uint8 Tensor. @@ -94,7 +94,7 @@ def to_file_like( format: str, *, pixel_format: Optional[str] = None, - crf: int = None, + crf: Optional[int] = None, ) -> None: """Encode frames into a file-like object. From 54a82943653e8403d8675e43c99e230bcd790fc7 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 10 Nov 2025 10:21:33 -0500 Subject: [PATCH 3/5] move tests --- test/test_ops.py | 277 ----------------------------------------------- 1 file changed, 277 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index ac43e20ae..3bd46c4e3 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1141,282 +1141,5 @@ def test_bad_input(self, tmp_path): ) -class TestVideoEncoderOps: - def decode(self, source=None) -> torch.Tensor: - return VideoDecoder(source).get_frames_in_range(start=0, stop=60) - - @pytest.mark.parametrize( - "format", ("mov", "mp4", "mkv", pytest.param("webm", marks=pytest.mark.slow)) - ) - @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) - def test_video_encoder_round_trip(self, tmp_path, format, method): - # Test that decode(encode(decode(frames))) == decode(frames) - ffmpeg_version = get_ffmpeg_major_version() - if format == "webm" and ( - ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) - ): - pytest.skip("Codec for webm is not available in this FFmpeg installation.") - source_frames = self.decode(TEST_SRC_2_720P.path).data - - # Frame rate is fixed with num frames decoded - params = dict(frame_rate=30, pixel_format="yuv444p", crf=0) - if method == "to_file": - encoded_path = str(tmp_path / f"encoder_output.{format}") - encode_video_to_file( - frames=source_frames, - filename=encoded_path, - **params, - ) - round_trip_frames = self.decode(encoded_path).data - elif method == "to_tensor": - encoded_tensor = encode_video_to_tensor( - source_frames, format=format, **params - ) - round_trip_frames = self.decode(encoded_tensor).data - elif method == "to_file_like": - file_like = io.BytesIO() - encode_video_to_file_like( - frames=source_frames, - format=format, - file_like=file_like, - **params, - ) - round_trip_frames = self.decode(file_like.getvalue()).data - else: - raise ValueError(f"Unknown method: {method}") - - assert source_frames.shape == round_trip_frames.shape - assert source_frames.dtype == round_trip_frames.dtype - - # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels - # are within a higher tolerance. - if ffmpeg_version == 6: - assert_close = partial(assert_tensor_close_on_at_least, percentage=99) - atol = 15 - else: - assert_close = torch.testing.assert_close - atol = 3 if format == "webm" else 2 - for s_frame, rt_frame in zip(source_frames, round_trip_frames): - assert psnr(s_frame, rt_frame) > 30 - assert_close(s_frame, rt_frame, atol=atol, rtol=0) - - @pytest.mark.parametrize( - "format", - ( - "mov", - "mp4", - "avi", - "mkv", - "flv", - "gif", - pytest.param("webm", marks=pytest.mark.slow), - ), - ) - @pytest.mark.parametrize("method", ("to_tensor", "to_file_like")) - def test_against_to_file(self, tmp_path, format, method): - # Test that to_file, to_tensor, and to_file_like produce the same results - ffmpeg_version = get_ffmpeg_major_version() - if format == "webm" and ( - ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) - ): - pytest.skip("Codec for webm is not available in this FFmpeg installation.") - - source_frames = self.decode(TEST_SRC_2_720P.path).data - params = dict(frame_rate=30, crf=0) - - encoded_file = tmp_path / f"output.{format}" - encode_video_to_file(frames=source_frames, filename=str(encoded_file), **params) - - if method == "to_tensor": - encoded_output = encode_video_to_tensor( - source_frames, format=format, **params - ) - else: # to_file_like - file_like = io.BytesIO() - encode_video_to_file_like( - frames=source_frames, - file_like=file_like, - format=format, - **params, - ) - encoded_output = file_like.getvalue() - - torch.testing.assert_close( - self.decode(encoded_file).data, - self.decode(encoded_output).data, - atol=0, - rtol=0, - ) - - @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") - @pytest.mark.parametrize( - "format", - ( - "mov", - "mp4", - "avi", - "mkv", - "flv", - pytest.param("webm", marks=pytest.mark.slow), - ), - ) - @pytest.mark.parametrize("pixel_format", ("yuv444p", "yuv420p")) - def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format): - ffmpeg_version = get_ffmpeg_major_version() - if format == "webm" and ( - ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) - ): - pytest.skip("Codec for webm is not available in this FFmpeg installation.") - if format in ("avi", "flv") and pixel_format == "yuv444p": - pytest.skip(f"Default codec for {format} does not support {pixel_format}") - - source_frames = self.decode(TEST_SRC_2_720P.path).data - - # Encode with FFmpeg CLI - temp_raw_path = str(tmp_path / "temp_input.raw") - with open(temp_raw_path, "wb") as f: - f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes()) - - ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}") - frame_rate = 30 - crf = 0 - # Some codecs (ex. MPEG4) do not support CRF. - # Flags not supported by the selected codec will be ignored. - ffmpeg_cmd = [ - "ffmpeg", - "-y", - "-f", - "rawvideo", - "-pix_fmt", - "rgb24", # Input format - "-s", - f"{source_frames.shape[3]}x{source_frames.shape[2]}", - "-r", - str(frame_rate), - "-i", - temp_raw_path, - "-pix_fmt", - pixel_format, # Output format - "-crf", - str(crf), - ffmpeg_encoded_path, - ] - subprocess.run(ffmpeg_cmd, check=True) - - # Encode with our video encoder - encoder_output_path = str(tmp_path / f"encoder_output.{format}") - encode_video_to_file( - frames=source_frames, - frame_rate=frame_rate, - filename=encoder_output_path, - pixel_format=pixel_format, - crf=crf, - ) - - ffmpeg_frames = self.decode(ffmpeg_encoded_path).data - encoder_frames = self.decode(encoder_output_path).data - - assert ffmpeg_frames.shape[0] == encoder_frames.shape[0] - - # If FFmpeg selects a codec or pixel format that uses qscale (not crf), - # the VideoEncoder outputs *slightly* different frames. - # There may be additional subtle differences in the encoder. - percentage = 94 if ffmpeg_version == 6 or format == "avi" else 99 - - # Check that PSNR between both encoded versions is high - for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): - res = psnr(ff_frame, enc_frame) - assert res > 30 - assert_tensor_close_on_at_least( - ff_frame, enc_frame, percentage=percentage, atol=2 - ) - - def test_to_file_like_custom_file_object(self): - """Test to_file_like with a custom file-like object that implements write and seek.""" - - class CustomFileObject: - def __init__(self): - self._file = io.BytesIO() - - def write(self, data): - return self._file.write(data) - - def seek(self, offset, whence=0): - return self._file.seek(offset, whence) - - def get_encoded_data(self): - return self._file.getvalue() - - source_frames = self.decode(TEST_SRC_2_720P.path).data - file_like = CustomFileObject() - encode_video_to_file_like( - source_frames, - frame_rate=30, - pixel_format="yuv444p", - crf=0, - format="mp4", - file_like=file_like, - ) - decoded_samples = self.decode(file_like.get_encoded_data()) - - torch.testing.assert_close( - decoded_samples.data, - source_frames, - atol=2, - rtol=0, - ) - - def test_to_file_like_real_file(self, tmp_path): - """Test to_file_like with a real file opened in binary write mode.""" - source_frames = self.decode(TEST_SRC_2_720P.path).data - file_path = tmp_path / "test_file_like.mp4" - - with open(file_path, "wb") as file_like: - encode_video_to_file_like( - source_frames, - frame_rate=30, - pixel_format="yuv444p", - crf=0, - format="mp4", - file_like=file_like, - ) - decoded_samples = self.decode(str(file_path)) - - torch.testing.assert_close( - decoded_samples.data, - source_frames, - atol=2, - rtol=0, - ) - - def test_to_file_like_bad_methods(self): - source_frames = self.decode(TEST_SRC_2_720P.path).data - - class NoWriteMethod: - def seek(self, offset, whence=0): - return 0 - - with pytest.raises( - RuntimeError, match="File like object must implement a write method" - ): - encode_video_to_file_like( - source_frames, - frame_rate=30, - format="mp4", - file_like=NoWriteMethod(), - ) - - class NoSeekMethod: - def write(self, data): - return len(data) - - with pytest.raises( - RuntimeError, match="File like object must implement a seek method" - ): - encode_video_to_file_like( - source_frames, frame_rate=30, format="mp4", file_like=NoSeekMethod() - ) - - if __name__ == "__main__": pytest.main() From 8fcc181246496b505c95c64ca7150998a98f6c18 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 10 Nov 2025 10:59:28 -0500 Subject: [PATCH 4/5] integrate pixel_format test changes --- test/test_encoders.py | 44 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/test/test_encoders.py b/test/test_encoders.py index 1adc7603a..f23994fe0 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -713,12 +713,6 @@ def encode_to_tensor(frames): def test_round_trip(self, tmp_path, format, method): # Test that decode(encode(decode(frames))) == decode(frames) ffmpeg_version = get_ffmpeg_major_version() - # In FFmpeg6, the default codec's best pixel format is lossy for all container formats but webm. - # As a result, we skip the round trip test. - if ffmpeg_version == 6 and format != "webm": - pytest.skip( - f"FFmpeg6 defaults to lossy encoding for {format}, skipping round-trip test." - ) if format == "webm" and ( ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) ): @@ -730,14 +724,18 @@ def test_round_trip(self, tmp_path, format, method): if method == "to_file": encoded_path = str(tmp_path / f"encoder_output.{format}") - encoder.to_file(dest=encoded_path, crf=0) + encoder.to_file(dest=encoded_path, pixel_format="yuv444p", crf=0) round_trip_frames = self.decode(encoded_path).data elif method == "to_tensor": - encoded_tensor = encoder.to_tensor(format=format, crf=0) + encoded_tensor = encoder.to_tensor( + format=format, pixel_format="yuv444p", crf=0 + ) round_trip_frames = self.decode(encoded_tensor).data elif method == "to_file_like": file_like = io.BytesIO() - encoder.to_file_like(file_like=file_like, format=format, crf=0) + encoder.to_file_like( + file_like=file_like, format=format, pixel_format="yuv444p", crf=0 + ) round_trip_frames = self.decode(file_like.getvalue()).data else: raise ValueError(f"Unknown method: {method}") @@ -745,17 +743,9 @@ def test_round_trip(self, tmp_path, format, method): assert source_frames.shape == round_trip_frames.shape assert source_frames.dtype == round_trip_frames.dtype - # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels - # are within a higher tolerance. - if ffmpeg_version == 6: - assert_close = partial(assert_tensor_close_on_at_least, percentage=99) - atol = 15 - else: - assert_close = torch.testing.assert_close - atol = 2 for s_frame, rt_frame in zip(source_frames, round_trip_frames): assert psnr(s_frame, rt_frame) > 30 - assert_close(s_frame, rt_frame, atol=atol, rtol=0) + torch.testing.assert_close(s_frame, rt_frame, atol=2, rtol=0) @pytest.mark.parametrize( "format", @@ -807,18 +797,18 @@ def test_against_to_file(self, tmp_path, format, method): "avi", "mkv", "flv", - "gif", pytest.param("webm", marks=pytest.mark.slow), ), ) - def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): - # Encode samples with our encoder and with the FFmpeg CLI, and check - # that both decoded outputs are similar + @pytest.mark.parametrize("pixel_format", ("yuv444p", "yuv420p")) + def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format): ffmpeg_version = get_ffmpeg_major_version() if format == "webm" and ( ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) ): pytest.skip("Codec for webm is not available in this FFmpeg installation.") + if format in ("avi", "flv") and pixel_format == "yuv444p": + pytest.skip(f"Default codec for {format} does not support {pixel_format}") source_frames = self.decode(TEST_SRC_2_720P.path).data @@ -838,13 +828,15 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): "-f", "rawvideo", "-pix_fmt", - "rgb24", + "rgb24", # Input format "-s", f"{source_frames.shape[3]}x{source_frames.shape[2]}", "-r", str(frame_rate), "-i", temp_raw_path, + "-pix_fmt", + pixel_format, # Output format "-crf", str(crf), ffmpeg_encoded_path, @@ -854,7 +846,7 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): # Encode with our video encoder encoder_output_path = str(tmp_path / f"encoder_output.{format}") encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate) - encoder.to_file(dest=encoder_output_path, crf=crf) + encoder.to_file(dest=encoder_output_path, pixel_format=pixel_format, crf=crf) ffmpeg_frames = self.decode(ffmpeg_encoded_path).data encoder_frames = self.decode(encoder_output_path).data @@ -894,7 +886,7 @@ def get_encoded_data(self): encoder = VideoEncoder(frames=source_frames, frame_rate=30) file_like = CustomFileObject() - encoder.to_file_like(file_like, format="mp4", crf=0) + encoder.to_file_like(file_like, format="mp4", pixel_format="yuv444p", crf=0) decoded_frames = self.decode(file_like.get_encoded_data()) torch.testing.assert_close( @@ -912,7 +904,7 @@ def test_to_file_like_real_file(self, tmp_path): file_path = tmp_path / "test_file_like.mp4" with open(file_path, "wb") as file_like: - encoder.to_file_like(file_like, format="mp4", crf=0) + encoder.to_file_like(file_like, format="mp4", pixel_format="yuv444p", crf=0) decoded_frames = self.decode(str(file_path)) torch.testing.assert_close( From d1e5bdf8dbd10e208f671d100dbefde1866c709d Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 10 Nov 2025 14:49:55 -0500 Subject: [PATCH 5/5] add webm tolerance --- test/test_encoders.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_encoders.py b/test/test_encoders.py index f23994fe0..3e245cac8 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -858,12 +858,13 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format): # There may be additional subtle differences in the encoder. percentage = 94 if ffmpeg_version == 6 or format == "avi" else 99 + atol = 3 if format == "webm" else 2 # Check that PSNR between both encoded versions is high for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): res = psnr(ff_frame, enc_frame) assert res > 30 assert_tensor_close_on_at_least( - ff_frame, enc_frame, percentage=percentage, atol=2 + ff_frame, enc_frame, percentage=percentage, atol=atol ) def test_to_file_like_custom_file_object(self):