Skip to content

Commit c4bca2d

Browse files
authored
Refactor gen resources (#984)
1 parent 1befed7 commit c4bca2d

File tree

2 files changed

+171
-161
lines changed

2 files changed

+171
-161
lines changed

test/generate_reference_resources.py

Lines changed: 47 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,20 @@
66

77
import subprocess
88
from pathlib import Path
9+
from typing import Optional
910

1011
import numpy as np
1112

1213
import torch
1314
from PIL import Image
1415

15-
from .utils import sanitize_filtergraph_expression
16+
from .utils import AV1_VIDEO, H265_VIDEO, NASA_VIDEO, TestVideo
1617

1718
# Run this script to update the resources used in unit tests. The resources are all derived
1819
# from source media already checked into the repo.
1920

20-
SCRIPT_DIR = Path(__file__).resolve().parent
21-
TORCHCODEC_PATH = SCRIPT_DIR.parent
22-
RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources"
2321

24-
25-
def convert_image_to_tensor(image_path):
22+
def convert_image_to_tensor(image_path: str) -> None:
2623
image_path = Path(image_path)
2724
if not image_path.exists():
2825
return
@@ -37,37 +34,56 @@ def convert_image_to_tensor(image_path):
3734
image_path.unlink()
3835

3936

40-
def get_frame_by_index(video_path, frame, output_path, stream, filters=None):
37+
def generate_frame_by_index(
38+
video: TestVideo,
39+
*,
40+
frame_index: int,
41+
stream_index: int,
42+
filters: Optional[str] = None,
43+
) -> None:
44+
# Note that we are using 0-based index naming. As a result, we are
45+
# generating files one-by-one, giving the actual file name that we want.
46+
# ffmpeg does have an option to generate multiple files for us, but it uses
47+
# 1-based indexing. We can't use 1-based indexing because we want to match
48+
# the 0-based indexing in our tests.
49+
base_path = video.get_base_path_by_index(
50+
frame_index, stream_index=stream_index, filters=filters
51+
)
52+
output_bmp = f"{base_path}.bmp"
53+
4154
# Note that we have an exlicit format conversion to rgb24 in our filtergraph specification,
4255
# which always happens BEFORE any of the filters that we receive as input. We do this to
4356
# ensure that the color conversion happens BEFORE the filters, matching the behavior of the
4457
# torchcodec filtergraph implementation.
4558
#
4659
# Not doing this would result in the color conversion happening AFTER the filters, which
4760
# would result in different color values for the same frame.
48-
filtergraph = f"select='eq(n\\,{frame})',format=rgb24"
61+
filtergraph = f"select='eq(n\\,{frame_index})',format=rgb24"
4962
if filters is not None:
5063
filtergraph = filtergraph + f",{filters}"
5164

5265
cmd = [
5366
"ffmpeg",
5467
"-y",
5568
"-i",
56-
video_path,
69+
video.path,
5770
"-map",
58-
f"0:{stream}",
71+
f"0:{stream_index}",
5972
"-vf",
6073
filtergraph,
6174
"-fps_mode",
6275
"passthrough",
6376
"-update",
6477
"1",
65-
output_path,
78+
output_bmp,
6679
]
6780
subprocess.run(cmd, check=True)
81+
convert_image_to_tensor(output_bmp)
6882

6983

70-
def get_frame_by_timestamp(video_path, timestamp, output_path):
84+
def generate_frame_by_timestamp(
85+
video_path: str, timestamp: float, output_path: str
86+
) -> None:
7187
cmd = [
7288
"ffmpeg",
7389
"-y",
@@ -80,40 +96,32 @@ def get_frame_by_timestamp(video_path, timestamp, output_path):
8096
output_path,
8197
]
8298
subprocess.run(cmd, check=True)
99+
convert_image_to_tensor(output_path)
83100

84101

85102
def generate_nasa_13013_references():
86-
VIDEO_PATH = RESOURCES_DIR / "nasa_13013.mp4"
87-
88103
# Note: The naming scheme used here must match the naming scheme used to load
89104
# tensors in ./utils.py.
90-
STREAMS = [0, 3]
91-
FRAMES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30, 35, 386, 387, 388, 389]
92-
for stream in STREAMS:
93-
for frame in FRAMES:
94-
# Note that we are using 0-based index naming. Asking ffmpeg to number output
95-
# frames would result in 1-based index naming. We enforce 0-based index naming
96-
# so that the name of reference frames matches the index when accessing that
97-
# frame in the Python decoder.
98-
output_bmp = f"{VIDEO_PATH}.stream{stream}.frame{frame:06d}.bmp"
99-
get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=stream)
100-
convert_image_to_tensor(output_bmp)
105+
streams = [0, 3]
106+
frames = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30, 35, 386, 387, 388, 389]
107+
for stream in streams:
108+
for frame in frames:
109+
generate_frame_by_index(NASA_VIDEO, frame_index=frame, stream_index=stream)
101110

102111
# Extract individual frames at specific timestamps, including the last frame of the video.
103112
seek_timestamp = [6.0, 6.1, 10.0, 12.979633]
104113
timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp]
105114
for timestamp, name in zip(seek_timestamp, timestamp_name):
106-
output_bmp = f"{VIDEO_PATH}.time{name}.bmp"
107-
get_frame_by_timestamp(VIDEO_PATH, timestamp, output_bmp)
108-
convert_image_to_tensor(output_bmp)
115+
output_bmp = f"{NASA_VIDEO.path}.time{name}.bmp"
116+
generate_frame_by_timestamp(NASA_VIDEO.path, timestamp, output_bmp)
109117

110118
# Extract frames with specific filters. We have tests that assume these exact filters.
111-
FRAMES = [0, 15, 200, 389]
119+
frames = [0, 15, 200, 389]
112120
crop_filter = "crop=300:200:50:35:exact=1"
113-
for frame in FRAMES:
114-
output_bmp = f"{VIDEO_PATH}.{sanitize_filtergraph_expression(crop_filter)}.stream3.frame{frame:06d}.bmp"
115-
get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=3, filters=crop_filter)
116-
convert_image_to_tensor(output_bmp)
121+
for frame in frames:
122+
generate_frame_by_index(
123+
NASA_VIDEO, frame_index=frame, stream_index=3, filters=crop_filter
124+
)
117125

118126

119127
def generate_h265_video_references():
@@ -122,25 +130,18 @@ def generate_h265_video_references():
122130
# ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
123131
# ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
124132
# Note that this video only has 1 stream, at index 0.
125-
VIDEO_PATH = RESOURCES_DIR / "h265_video.mp4"
126-
FRAMES = [5]
127-
for frame in FRAMES:
128-
output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
129-
get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
130-
convert_image_to_tensor(output_bmp)
133+
frames = [5]
134+
for frame in frames:
135+
generate_frame_by_index(H265_VIDEO, frame_index=frame, stream_index=0)
131136

132137

133138
def generate_av1_video_references():
134139
# This video was generated by running the following:
135140
# ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
136141
# Note that this video only has 1 stream, at index 0.
137-
VIDEO_PATH = RESOURCES_DIR / "av1_video.mkv"
138-
FRAMES = [10]
139-
140-
for frame in FRAMES:
141-
output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
142-
get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
143-
convert_image_to_tensor(output_bmp)
142+
frames = [10]
143+
for frame in frames:
144+
generate_frame_by_index(AV1_VIDEO, frame_index=frame, stream_index=0)
144145

145146

146147
def main():

0 commit comments

Comments
 (0)