66
77import subprocess
88from pathlib import Path
9+ from typing import Optional
910
1011import numpy as np
1112
1213import torch
1314from PIL import Image
1415
15- from .utils import sanitize_filtergraph_expression
16+ from .utils import AV1_VIDEO , H265_VIDEO , NASA_VIDEO , TestVideo
1617
1718# Run this script to update the resources used in unit tests. The resources are all derived
1819# from source media already checked into the repo.
1920
20- SCRIPT_DIR = Path (__file__ ).resolve ().parent
21- TORCHCODEC_PATH = SCRIPT_DIR .parent
22- RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources"
2321
24-
25- def convert_image_to_tensor (image_path ):
22+ def convert_image_to_tensor (image_path : str ) -> None :
2623 image_path = Path (image_path )
2724 if not image_path .exists ():
2825 return
@@ -37,37 +34,56 @@ def convert_image_to_tensor(image_path):
3734 image_path .unlink ()
3835
3936
40- def get_frame_by_index (video_path , frame , output_path , stream , filters = None ):
37+ def generate_frame_by_index (
38+ video : TestVideo ,
39+ * ,
40+ frame_index : int ,
41+ stream_index : int ,
42+ filters : Optional [str ] = None ,
43+ ) -> None :
44+ # Note that we are using 0-based index naming. As a result, we are
45+ # generating files one-by-one, giving the actual file name that we want.
46+ # ffmpeg does have an option to generate multiple files for us, but it uses
47+ # 1-based indexing. We can't use 1-based indexing because we want to match
48+ # the 0-based indexing in our tests.
49+ base_path = video .get_base_path_by_index (
50+ frame_index , stream_index = stream_index , filters = filters
51+ )
52+ output_bmp = f"{ base_path } .bmp"
53+
4154 # Note that we have an exlicit format conversion to rgb24 in our filtergraph specification,
4255 # which always happens BEFORE any of the filters that we receive as input. We do this to
4356 # ensure that the color conversion happens BEFORE the filters, matching the behavior of the
4457 # torchcodec filtergraph implementation.
4558 #
4659 # Not doing this would result in the color conversion happening AFTER the filters, which
4760 # would result in different color values for the same frame.
48- filtergraph = f"select='eq(n\\ ,{ frame } )',format=rgb24"
61+ filtergraph = f"select='eq(n\\ ,{ frame_index } )',format=rgb24"
4962 if filters is not None :
5063 filtergraph = filtergraph + f",{ filters } "
5164
5265 cmd = [
5366 "ffmpeg" ,
5467 "-y" ,
5568 "-i" ,
56- video_path ,
69+ video . path ,
5770 "-map" ,
58- f"0:{ stream } " ,
71+ f"0:{ stream_index } " ,
5972 "-vf" ,
6073 filtergraph ,
6174 "-fps_mode" ,
6275 "passthrough" ,
6376 "-update" ,
6477 "1" ,
65- output_path ,
78+ output_bmp ,
6679 ]
6780 subprocess .run (cmd , check = True )
81+ convert_image_to_tensor (output_bmp )
6882
6983
70- def get_frame_by_timestamp (video_path , timestamp , output_path ):
84+ def generate_frame_by_timestamp (
85+ video_path : str , timestamp : float , output_path : str
86+ ) -> None :
7187 cmd = [
7288 "ffmpeg" ,
7389 "-y" ,
@@ -80,40 +96,32 @@ def get_frame_by_timestamp(video_path, timestamp, output_path):
8096 output_path ,
8197 ]
8298 subprocess .run (cmd , check = True )
99+ convert_image_to_tensor (output_path )
83100
84101
85102def generate_nasa_13013_references ():
86- VIDEO_PATH = RESOURCES_DIR / "nasa_13013.mp4"
87-
88103 # Note: The naming scheme used here must match the naming scheme used to load
89104 # tensors in ./utils.py.
90- STREAMS = [0 , 3 ]
91- FRAMES = [0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 15 , 20 , 25 , 30 , 35 , 386 , 387 , 388 , 389 ]
92- for stream in STREAMS :
93- for frame in FRAMES :
94- # Note that we are using 0-based index naming. Asking ffmpeg to number output
95- # frames would result in 1-based index naming. We enforce 0-based index naming
96- # so that the name of reference frames matches the index when accessing that
97- # frame in the Python decoder.
98- output_bmp = f"{ VIDEO_PATH } .stream{ stream } .frame{ frame :06d} .bmp"
99- get_frame_by_index (VIDEO_PATH , frame , output_bmp , stream = stream )
100- convert_image_to_tensor (output_bmp )
105+ streams = [0 , 3 ]
106+ frames = [0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 15 , 20 , 25 , 30 , 35 , 386 , 387 , 388 , 389 ]
107+ for stream in streams :
108+ for frame in frames :
109+ generate_frame_by_index (NASA_VIDEO , frame_index = frame , stream_index = stream )
101110
102111 # Extract individual frames at specific timestamps, including the last frame of the video.
103112 seek_timestamp = [6.0 , 6.1 , 10.0 , 12.979633 ]
104113 timestamp_name = [f"{ seek_timestamp :06f} " for seek_timestamp in seek_timestamp ]
105114 for timestamp , name in zip (seek_timestamp , timestamp_name ):
106- output_bmp = f"{ VIDEO_PATH } .time{ name } .bmp"
107- get_frame_by_timestamp (VIDEO_PATH , timestamp , output_bmp )
108- convert_image_to_tensor (output_bmp )
115+ output_bmp = f"{ NASA_VIDEO .path } .time{ name } .bmp"
116+ generate_frame_by_timestamp (NASA_VIDEO .path , timestamp , output_bmp )
109117
110118 # Extract frames with specific filters. We have tests that assume these exact filters.
111- FRAMES = [0 , 15 , 200 , 389 ]
119+ frames = [0 , 15 , 200 , 389 ]
112120 crop_filter = "crop=300:200:50:35:exact=1"
113- for frame in FRAMES :
114- output_bmp = f" { VIDEO_PATH } . { sanitize_filtergraph_expression ( crop_filter ) } .stream3.frame { frame :06d } .bmp"
115- get_frame_by_index ( VIDEO_PATH , frame , output_bmp , stream = 3 , filters = crop_filter )
116- convert_image_to_tensor ( output_bmp )
121+ for frame in frames :
122+ generate_frame_by_index (
123+ NASA_VIDEO , frame_index = frame , stream_index = 3 , filters = crop_filter
124+ )
117125
118126
119127def generate_h265_video_references ():
@@ -122,25 +130,18 @@ def generate_h265_video_references():
122130 # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
123131 # ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
124132 # Note that this video only has 1 stream, at index 0.
125- VIDEO_PATH = RESOURCES_DIR / "h265_video.mp4"
126- FRAMES = [5 ]
127- for frame in FRAMES :
128- output_bmp = f"{ VIDEO_PATH } .stream0.frame{ frame :06d} .bmp"
129- get_frame_by_index (VIDEO_PATH , frame , output_bmp , stream = 0 )
130- convert_image_to_tensor (output_bmp )
133+ frames = [5 ]
134+ for frame in frames :
135+ generate_frame_by_index (H265_VIDEO , frame_index = frame , stream_index = 0 )
131136
132137
133138def generate_av1_video_references ():
134139 # This video was generated by running the following:
135140 # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
136141 # Note that this video only has 1 stream, at index 0.
137- VIDEO_PATH = RESOURCES_DIR / "av1_video.mkv"
138- FRAMES = [10 ]
139-
140- for frame in FRAMES :
141- output_bmp = f"{ VIDEO_PATH } .stream0.frame{ frame :06d} .bmp"
142- get_frame_by_index (VIDEO_PATH , frame , output_bmp , stream = 0 )
143- convert_image_to_tensor (output_bmp )
142+ frames = [10 ]
143+ for frame in frames :
144+ generate_frame_by_index (AV1_VIDEO , frame_index = frame , stream_index = 0 )
144145
145146
146147def main ():
0 commit comments