-
Notifications
You must be signed in to change notification settings - Fork 72
Add pixel_format to VideoEncoder API #1027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
d75f0eb
4ff25f6
7ef8a8f
dc86a8c
884f4dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -213,6 +213,7 @@ def encode_video_to_file_like( | |
| format: str, | ||
| file_like: Union[io.RawIOBase, io.BufferedIOBase], | ||
| crf: Optional[int] = None, | ||
| pixel_format: Optional[str] = None, | ||
| ) -> None: | ||
| """Encode video frames to a file-like object. | ||
|
|
||
|
|
@@ -222,6 +223,7 @@ def encode_video_to_file_like( | |
| format: Video format (e.g., "mp4", "mov", "mkv") | ||
| file_like: File-like object that supports write() and seek() methods | ||
| crf: Optional constant rate factor for encoding quality | ||
| pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p") | ||
| """ | ||
| assert _pybind_ops is not None | ||
|
|
||
|
|
@@ -230,6 +232,7 @@ def encode_video_to_file_like( | |
| frame_rate, | ||
| format, | ||
| _pybind_ops.create_file_like_context(file_like, True), # True means for writing | ||
| pixel_format, | ||
| crf, | ||
| ) | ||
|
|
||
|
|
@@ -319,6 +322,7 @@ def encode_video_to_file_abstract( | |
| frame_rate: int, | ||
| filename: str, | ||
| crf: Optional[int], | ||
| pixel_format: Optional[str], | ||
|
||
| ) -> None: | ||
| return | ||
|
|
||
|
|
@@ -329,6 +333,7 @@ def encode_video_to_tensor_abstract( | |
| frame_rate: int, | ||
| format: str, | ||
| crf: Optional[int], | ||
| pixel_format: Optional[str], | ||
|
||
| ) -> torch.Tensor: | ||
| return torch.empty([], dtype=torch.long) | ||
|
|
||
|
|
@@ -340,6 +345,7 @@ def _encode_video_to_file_like_abstract( | |
| format: str, | ||
| file_like_context: int, | ||
| crf: Optional[int] = None, | ||
| pixel_format: Optional[str] = None, | ||
| ) -> None: | ||
| return | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| from pathlib import Path | ||
| from typing import Union | ||
| from typing import Optional, Union | ||
|
|
||
| import torch | ||
| from torch import Tensor | ||
|
|
@@ -35,29 +35,38 @@ def __init__(self, frames: Tensor, *, frame_rate: int): | |
| def to_file( | ||
| self, | ||
| dest: Union[str, Path], | ||
| *, | ||
| pixel_format: Optional[str] = None, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good job on making this a keyword only params 👍 |
||
| ) -> None: | ||
| """Encode frames into a file. | ||
|
|
||
| Args: | ||
| dest (str or ``pathlib.Path``): The path to the output file, e.g. | ||
| ``video.mp4``. The extension of the file determines the video | ||
| container format. | ||
| pixel_format (str, optional): The pixel format for encoding (e.g., | ||
| "yuv420p", "yuv444p"). If not specified, uses codec's default format. | ||
| """ | ||
| _core.encode_video_to_file( | ||
| frames=self._frames, | ||
| frame_rate=self._frame_rate, | ||
| filename=str(dest), | ||
| pixel_format=pixel_format, | ||
| ) | ||
|
|
||
| def to_tensor( | ||
| self, | ||
| format: str, | ||
| *, | ||
| pixel_format: Optional[str] = None, | ||
| ) -> Tensor: | ||
| """Encode frames into raw bytes, as a 1D uint8 Tensor. | ||
|
|
||
| Args: | ||
| format (str): The container format of the encoded frames, e.g. "mp4", "mov", | ||
| "mkv", "avi", "webm", "flv", or "gif" | ||
| pixel_format (str, optional): The pixel format to encode frames into (e.g., | ||
| "yuv420p", "yuv444p"). If not specified, uses codec's default format. | ||
|
|
||
| Returns: | ||
| Tensor: The raw encoded bytes as 4D uint8 Tensor. | ||
|
|
@@ -66,12 +75,15 @@ def to_tensor( | |
| frames=self._frames, | ||
| frame_rate=self._frame_rate, | ||
| format=format, | ||
| pixel_format=pixel_format, | ||
| ) | ||
|
|
||
| def to_file_like( | ||
| self, | ||
| file_like, | ||
| format: str, | ||
| *, | ||
| pixel_format: Optional[str] = None, | ||
| ) -> None: | ||
| """Encode frames into a file-like object. | ||
|
|
||
|
|
@@ -83,10 +95,13 @@ def to_file_like( | |
| int = 0) -> int``. | ||
| format (str): The container format of the encoded frames, e.g. "mp4", "mov", | ||
| "mkv", "avi", "webm", "flv", or "gif". | ||
| pixel_format (str, optional): The pixel format for encoding (e.g., | ||
| "yuv420p", "yuv444p"). If not specified, uses codec's default format. | ||
| """ | ||
| _core.encode_video_to_file_like( | ||
| frames=self._frames, | ||
| frame_rate=self._frame_rate, | ||
| format=format, | ||
| file_like=file_like, | ||
| pixel_format=pixel_format, | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1162,21 +1162,14 @@ def decode(self, source=None) -> torch.Tensor: | |
| def test_video_encoder_round_trip(self, tmp_path, format, method): | ||
| # Test that decode(encode(decode(frames))) == decode(frames) | ||
| ffmpeg_version = get_ffmpeg_major_version() | ||
| # In FFmpeg6, the default codec's best pixel format is lossy for all container formats but webm. | ||
| # As a result, we skip the round trip test. | ||
| if ffmpeg_version == 6 and format != "webm": | ||
| pytest.skip( | ||
| f"FFmpeg6 defaults to lossy encoding for {format}, skipping round-trip test." | ||
| ) | ||
| if format == "webm" and ( | ||
| ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) | ||
| ): | ||
| pytest.skip("Codec for webm is not available in this FFmpeg installation.") | ||
| source_frames = self.decode(TEST_SRC_2_720P.path).data | ||
|
|
||
| params = dict( | ||
| frame_rate=30, crf=0 | ||
| ) # Frame rate is fixed with num frames decoded | ||
| # Frame rate is fixed with num frames decoded | ||
| params = dict(frame_rate=30, pixel_format="yuv444p", crf=0) | ||
| if method == "to_file": | ||
| encoded_path = str(tmp_path / f"encoder_output.{format}") | ||
| encode_video_to_file( | ||
|
|
@@ -1212,7 +1205,7 @@ def test_video_encoder_round_trip(self, tmp_path, format, method): | |
| atol = 15 | ||
| else: | ||
| assert_close = torch.testing.assert_close | ||
| atol = 2 | ||
| atol = 3 if format == "webm" else 2 | ||
| for s_frame, rt_frame in zip(source_frames, round_trip_frames): | ||
| assert psnr(s_frame, rt_frame) > 30 | ||
| assert_close(s_frame, rt_frame, atol=atol, rtol=0) | ||
|
|
@@ -1274,16 +1267,18 @@ def test_against_to_file(self, tmp_path, format, method): | |
| "avi", | ||
| "mkv", | ||
| "flv", | ||
| "gif", | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| pytest.param("webm", marks=pytest.mark.slow), | ||
| ), | ||
| ) | ||
| def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): | ||
| @pytest.mark.parametrize("pixel_format", ("yuv444p", "yuv420p")) | ||
| def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format): | ||
| ffmpeg_version = get_ffmpeg_major_version() | ||
| if format == "webm" and ( | ||
| ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) | ||
| ): | ||
| pytest.skip("Codec for webm is not available in this FFmpeg installation.") | ||
| if format in ("avi", "flv") and pixel_format == "yuv444p": | ||
| pytest.skip(f"Default codec for {format} does not support {pixel_format}") | ||
|
|
||
| source_frames = self.decode(TEST_SRC_2_720P.path).data | ||
|
|
||
|
|
@@ -1303,13 +1298,15 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): | |
| "-f", | ||
| "rawvideo", | ||
| "-pix_fmt", | ||
| "rgb24", | ||
| "rgb24", # Input format | ||
| "-s", | ||
| f"{source_frames.shape[3]}x{source_frames.shape[2]}", | ||
| "-r", | ||
| str(frame_rate), | ||
| "-i", | ||
| temp_raw_path, | ||
| "-pix_fmt", | ||
| pixel_format, # Output format | ||
| "-crf", | ||
| str(crf), | ||
| ffmpeg_encoded_path, | ||
|
|
@@ -1322,6 +1319,7 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): | |
| frames=source_frames, | ||
| frame_rate=frame_rate, | ||
| filename=encoder_output_path, | ||
| pixel_format=pixel_format, | ||
| crf=crf, | ||
| ) | ||
|
|
||
|
|
@@ -1362,7 +1360,12 @@ def get_encoded_data(self): | |
| source_frames = self.decode(TEST_SRC_2_720P.path).data | ||
| file_like = CustomFileObject() | ||
| encode_video_to_file_like( | ||
| source_frames, frame_rate=30, crf=0, format="mp4", file_like=file_like | ||
| source_frames, | ||
| frame_rate=30, | ||
| pixel_format="yuv444p", | ||
| crf=0, | ||
| format="mp4", | ||
| file_like=file_like, | ||
| ) | ||
| decoded_samples = self.decode(file_like.get_encoded_data()) | ||
|
|
||
|
|
@@ -1380,7 +1383,12 @@ def test_to_file_like_real_file(self, tmp_path): | |
|
|
||
| with open(file_path, "wb") as file_like: | ||
| encode_video_to_file_like( | ||
| source_frames, frame_rate=30, crf=0, format="mp4", file_like=file_like | ||
| source_frames, | ||
| frame_rate=30, | ||
| pixel_format="yuv444p", | ||
| crf=0, | ||
| format="mp4", | ||
| file_like=file_like, | ||
| ) | ||
| decoded_samples = self.decode(str(file_path)) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getSupportedPixelFormatsis not guaranteed to return any formats. If the user does not specify a format and we find none, I think we should try to use the broadly supportedyuv420p, rather than error out.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed, this makes sense and that's similar to what we do for the audio encoder when we can't validate:
torchcodec/src/torchcodec/_core/Encoder.cpp
Lines 85 to 87 in 8e615e3