Skip to content

Commit d059f09

Browse files
committed
delete padding when num_frames < temporal_patch_size
1 parent f257f08 commit d059f09

File tree

2 files changed

+0
-22
lines changed

2 files changed

+0
-22
lines changed

src/transformers/models/qwen3_vl/video_processing_qwen3_vl.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,10 +196,6 @@ def _preprocess(
196196

197197
for shape, stacked_videos in grouped_videos.items():
198198
if do_resize:
199-
T = stacked_videos.shape[1]
200-
if pad := -T % temporal_patch_size:
201-
repeats = stacked_videos[:, -1:].expand(-1, pad, -1, -1, -1)
202-
stacked_videos = torch.cat((stacked_videos, repeats), dim=1)
203199
B, T, C, H, W = stacked_videos.shape
204200
num_frames, height, width = T, H, W
205201
resized_height, resized_width = smart_resize(

tests/models/qwen3_vl/test_video_processing_qwen3_vl.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -329,24 +329,6 @@ def test_call_sample_frames(self):
329329
if prev_max_resolution is not None:
330330
self.video_processor_tester.max_resolution = prev_max_resolution
331331

332-
def test_only_one_image_input(self):
333-
for video_processing_class in self.video_processor_list:
334-
video_processor_dict = self.video_processor_dict.copy()
335-
video_processor_dict["size"] = {"longest_edge": 1 * 32 * 32, "shortest_edge": 32 * 32}
336-
video_processor_dict["do_sample_frames"] = False
337-
video_processor_dict["temporal_patch_size"] = 3
338-
video_processing = video_processing_class(**video_processor_dict)
339-
340-
n, w, h = 1, 32, 32
341-
video_inputs = [(np.random.randint(0, 256, (h, w, 3), dtype=np.uint8)) for _ in range(n)]
342-
343-
video_processed = video_processing(video_inputs, return_tensors="pt")
344-
encoded_videos = video_processed[self.input_name]
345-
self.assertEqual(list(encoded_videos.shape), [4, 2304])
346-
347-
video_grid_thw = video_processed["video_grid_thw"]
348-
self.assertEqual(video_grid_thw.tolist(), [[1, 2, 2]])
349-
350332
def test_num_frames_equal_temporal_patch_size_plus_two(self):
351333
for video_processing_class in self.video_processor_list:
352334
video_processor_dict = self.video_processor_dict.copy()

0 commit comments

Comments
 (0)