Skip to content

Commit 901e5d5

Browse files
authored
Fix bug when downsampling by a great factor (empty output frame) (#586)
1 parent 4ee3db5 commit 901e5d5

File tree

5 files changed

+2057
-1
lines changed

5 files changed

+2057
-1
lines changed

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1481,8 +1481,11 @@ UniqueAVFrame VideoDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14811481
static_cast<const uint8_t**>(
14821482
const_cast<const uint8_t**>(srcAVFrame->data)),
14831483
srcAVFrame->nb_samples);
1484+
// numConvertedSamples can be 0 if we're downsampling by a great factor and
1485+
// the first frame doesn't contain a lot of samples. It should be handled
1486+
// properly by the caller.
14841487
TORCH_CHECK(
1485-
numConvertedSamples > 0,
1488+
numConvertedSamples >= 0,
14861489
"Error in swr_convert: ",
14871490
getFFMPEGErrorStringFromErrorCode(numConvertedSamples));
14881491

test/decoders/test_decoders.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
in_fbcode,
2525
NASA_AUDIO,
2626
NASA_AUDIO_MP3,
27+
NASA_AUDIO_MP3_44100,
2728
NASA_VIDEO,
2829
SINE_MONO_S16,
2930
SINE_MONO_S32,
@@ -1165,6 +1166,35 @@ def test_sample_rate_conversion_stereo(self):
11651166
decoder = AudioDecoder(asset.path, sample_rate=44_100)
11661167
decoder.get_samples_played_in_range(start_seconds=0)
11671168

1169+
def test_downsample_empty_frame(self):
1170+
# Non-regression test for
1171+
# https://github.com/pytorch/torchcodec/pull/586: when downsampling by
1172+
# a great factor, if an input frame has a small amount of sample, the
1173+
# resampled frame (as output by swresample) may contain zero sample. We
1174+
# make sure we handle this properly.
1175+
#
1176+
# NASA_AUDIO_MP3_44100's first frame has only 47 samples which triggers
1177+
# the test scenario:
1178+
# ```
1179+
# » ffprobe -v error -hide_banner -select_streams a:0 -show_frames -of json test/resources/nasa_13013.mp4.audio_44100.mp3 | grep nb_samples | head -n 3
1180+
# "nb_samples": 47,
1181+
# "nb_samples": 1152,
1182+
# "nb_samples": 1152,
1183+
# ```
1184+
asset = NASA_AUDIO_MP3_44100
1185+
assert asset.sample_rate == 44_100
1186+
decoder = AudioDecoder(asset.path, sample_rate=8_000)
1187+
frames_44100_to_8000 = decoder.get_samples_played_in_range(start_seconds=0)
1188+
1189+
# Just checking correctness now
1190+
asset = NASA_AUDIO_MP3
1191+
assert asset.sample_rate == 8_000
1192+
decoder = AudioDecoder(asset.path)
1193+
frames_8000 = decoder.get_samples_played_in_range(start_seconds=0)
1194+
torch.testing.assert_close(
1195+
frames_44100_to_8000.data, frames_8000.data, atol=0.03, rtol=0
1196+
)
1197+
11681198
def test_s16_ffmpeg4_bug(self):
11691199
# s16 fails on FFmpeg4 but can be decoded on other versions.
11701200
# Debugging logs show that we're hitting:
205 KB
Binary file not shown.

0 commit comments

Comments
 (0)