@@ -17,28 +17,28 @@ def load_with_torchcodec(
1717 backend : Optional [str ] = None ,
1818) -> Tuple [torch .Tensor , int ]:
1919 """Load audio data from source using TorchCodec's AudioDecoder.
20-
20+
2121 .. note::
22-
22+
2323 This function supports the same API as :func:`~torchaudio.load`, and
2424 relies on TorchCodec's decoding capabilities under the hood. It is
2525 provided for convenience, but we do recommend that you port your code to
2626 natively use ``torchcodec``'s ``AudioDecoder`` class for better
2727 performance:
2828 https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder.
29- In TorchAudio 2.9, :func:`~torchaudio.load` will be relying on
29+ As of TorchAudio 2.9, :func:`~torchaudio.load` relies on
3030 :func:`~torchaudio.load_with_torchcodec`. Note that some parameters of
3131 :func:`~torchaudio.load`, like ``normalize``, ``buffer_size``, and
3232 ``backend``, are ignored by :func:`~torchaudio.load_with_torchcodec`.
33-
34-
33+
34+
3535 Args:
3636 uri (path-like object or file-like object):
3737 Source of audio data. The following types are accepted:
38-
38+
3939 * ``path-like``: File path or URL.
4040 * ``file-like``: Object with ``read(size: int) -> bytes`` method.
41-
41+
4242 frame_offset (int, optional):
4343 Number of samples to skip before start reading data.
4444 num_frames (int, optional):
@@ -58,17 +58,17 @@ def load_with_torchcodec(
5858 Not used by TorchCodec AudioDecoder. Provided for API compatibility.
5959 backend (str or None, optional):
6060 Not used by TorchCodec AudioDecoder. Provided for API compatibility.
61-
61+
6262 Returns:
6363 (torch.Tensor, int): Resulting Tensor and sample rate.
6464 Always returns float32 tensors. If ``channels_first=True``, shape is
6565 `[channel, time]`, otherwise `[time, channel]`.
66-
66+
6767 Raises:
6868 ImportError: If torchcodec is not available.
6969 ValueError: If unsupported parameters are used.
7070 RuntimeError: If TorchCodec fails to decode the audio.
71-
71+
7272 Note:
7373 - TorchCodec always returns normalized float32 samples, so the ``normalize``
7474 parameter has no effect.
@@ -84,7 +84,7 @@ def load_with_torchcodec(
8484 "TorchCodec is required for load_with_torchcodec. "
8585 "Please install torchcodec to use this function."
8686 ) from e
87-
87+
8888 # Parameter validation and warnings
8989 if not normalize :
9090 import warnings
@@ -94,71 +94,71 @@ def load_with_torchcodec(
9494 UserWarning ,
9595 stacklevel = 2
9696 )
97-
97+
9898 if buffer_size != 4096 :
9999 import warnings
100100 warnings .warn (
101101 "The 'buffer_size' parameter is not used by TorchCodec AudioDecoder." ,
102102 UserWarning ,
103103 stacklevel = 2
104104 )
105-
105+
106106 if backend is not None :
107107 import warnings
108108 warnings .warn (
109109 "The 'backend' parameter is not used by TorchCodec AudioDecoder." ,
110110 UserWarning ,
111111 stacklevel = 2
112112 )
113-
113+
114114 if format is not None :
115115 import warnings
116116 warnings .warn (
117117 "The 'format' parameter is not supported by TorchCodec AudioDecoder." ,
118118 UserWarning ,
119119 stacklevel = 2
120120 )
121-
121+
122122 # Create AudioDecoder
123123 try :
124124 decoder = AudioDecoder (uri )
125125 except Exception as e :
126126 raise RuntimeError (f"Failed to create AudioDecoder for { uri } : { e } " ) from e
127-
127+
128128 # Get sample rate from metadata
129129 sample_rate = decoder .metadata .sample_rate
130130 if sample_rate is None :
131131 raise RuntimeError ("Unable to determine sample rate from audio metadata" )
132-
132+
133133 # Decode the entire file first, then subsample manually
134134 # This is the simplest approach since torchcodec uses time-based indexing
135135 try :
136136 audio_samples = decoder .get_all_samples ()
137137 except Exception as e :
138138 raise RuntimeError (f"Failed to decode audio samples: { e } " ) from e
139-
139+
140140 data = audio_samples .data
141-
141+
142142 # Apply frame_offset and num_frames (which are actually sample offsets)
143143 if frame_offset > 0 :
144144 if frame_offset >= data .shape [1 ]:
145145 # Return empty tensor if offset is beyond available data
146146 empty_shape = (data .shape [0 ], 0 ) if channels_first else (0 , data .shape [0 ])
147147 return torch .zeros (empty_shape , dtype = torch .float32 ), sample_rate
148148 data = data [:, frame_offset :]
149-
149+
150150 if num_frames == 0 :
151151 # Return empty tensor if num_frames is 0
152152 empty_shape = (data .shape [0 ], 0 ) if channels_first else (0 , data .shape [0 ])
153153 return torch .zeros (empty_shape , dtype = torch .float32 ), sample_rate
154154 elif num_frames > 0 :
155155 data = data [:, :num_frames ]
156-
156+
157157 # TorchCodec returns data in [channel, time] format by default
158158 # Handle channels_first parameter
159159 if not channels_first :
160160 data = data .transpose (0 , 1 ) # [channel, time] -> [time, channel]
161-
161+
162162 return data , sample_rate
163163
164164
@@ -177,70 +177,70 @@ def save_with_torchcodec(
177177 """Save audio data to file using TorchCodec's AudioEncoder.
178178
179179 .. note::
180-
180+
181181 This function supports the same API as :func:`~torchaudio.save`, and
182182 relies on TorchCodec's encoding capabilities under the hood. It is
183183 provided for convenience, but we do recommend that you port your code to
184184 natively use ``torchcodec``'s ``AudioEncoder`` class for better
185185 performance:
186186 https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder.
187- In TorchAudio 2.9, :func:`~torchaudio.save` will be relying on
187+ As of TorchAudio 2.9, :func:`~torchaudio.save` relies on
188188 :func:`~torchaudio.save_with_torchcodec`. Note that some parameters of
189189 :func:`~torchaudio.save`, like ``format``, ``encoding``,
190190 ``bits_per_sample``, ``buffer_size``, and ``backend``, are ignored by
191191 are ignored by :func:`~torchaudio.save_with_torchcodec`.
192-
192+
193193 This function provides a TorchCodec-based alternative to torchaudio.save
194194 with the same API. TorchCodec's AudioEncoder provides efficient encoding
195195 with FFmpeg under the hood.
196-
196+
197197 Args:
198198 uri (path-like object):
199199 Path to save the audio file. The file extension determines the format.
200-
200+
201201 src (torch.Tensor):
202202 Audio data to save. Must be a 1D or 2D tensor with float32 values
203203 in the range [-1, 1]. If 2D, shape should be [channel, time] when
204204 channels_first=True, or [time, channel] when channels_first=False.
205-
205+
206206 sample_rate (int):
207207 Sample rate of the audio data.
208-
208+
209209 channels_first (bool, optional):
210210 Indicates whether the input tensor has channels as the first dimension.
211211 If True, expects [channel, time]. If False, expects [time, channel].
212212 Default: True.
213-
213+
214214 format (str or None, optional):
215215 Audio format hint. Not used by TorchCodec (format is determined by
216216 file extension). A warning is issued if provided.
217217 Default: None.
218-
218+
219219 encoding (str or None, optional):
220220 Audio encoding. Not fully supported by TorchCodec AudioEncoder.
221221 A warning is issued if provided. Default: None.
222-
222+
223223 bits_per_sample (int or None, optional):
224224 Bits per sample. Not directly supported by TorchCodec AudioEncoder.
225225 A warning is issued if provided. Default: None.
226-
226+
227227 buffer_size (int, optional):
228228 Not used by TorchCodec AudioEncoder. Provided for API compatibility.
229229 A warning is issued if not default value. Default: 4096.
230-
230+
231231 backend (str or None, optional):
232232 Not used by TorchCodec AudioEncoder. Provided for API compatibility.
233233 A warning is issued if provided. Default: None.
234-
234+
235235 compression (float, int or None, optional):
236236 Compression level or bit rate. Maps to bit_rate parameter in
237237 TorchCodec AudioEncoder. Default: None.
238-
238+
239239 Raises:
240240 ImportError: If torchcodec is not available.
241241 ValueError: If input parameters are invalid.
242242 RuntimeError: If TorchCodec fails to encode the audio.
243-
243+
244244 Note:
245245 - TorchCodec AudioEncoder expects float32 samples in [-1, 1] range.
246246 - Some parameters (format, encoding, bits_per_sample, buffer_size, backend)
@@ -256,7 +256,7 @@ def save_with_torchcodec(
256256 "TorchCodec is required for save_with_torchcodec. "
257257 "Please install torchcodec to use this function."
258258 ) from e
259-
259+
260260 # Parameter validation and warnings
261261 if format is not None :
262262 import warnings
@@ -266,49 +266,49 @@ def save_with_torchcodec(
266266 UserWarning ,
267267 stacklevel = 2
268268 )
269-
269+
270270 if encoding is not None :
271271 import warnings
272272 warnings .warn (
273273 "The 'encoding' parameter is not fully supported by TorchCodec AudioEncoder." ,
274274 UserWarning ,
275275 stacklevel = 2
276276 )
277-
277+
278278 if bits_per_sample is not None :
279279 import warnings
280280 warnings .warn (
281281 "The 'bits_per_sample' parameter is not directly supported by TorchCodec AudioEncoder." ,
282282 UserWarning ,
283283 stacklevel = 2
284284 )
285-
285+
286286 if buffer_size != 4096 :
287287 import warnings
288288 warnings .warn (
289289 "The 'buffer_size' parameter is not used by TorchCodec AudioEncoder." ,
290290 UserWarning ,
291291 stacklevel = 2
292292 )
293-
293+
294294 if backend is not None :
295295 import warnings
296296 warnings .warn (
297297 "The 'backend' parameter is not used by TorchCodec AudioEncoder." ,
298298 UserWarning ,
299299 stacklevel = 2
300300 )
301-
301+
302302 # Input validation
303303 if not isinstance (src , torch .Tensor ):
304304 raise ValueError (f"Expected src to be a torch.Tensor, got { type (src )} " )
305-
305+
306306 if src .dtype != torch .float32 :
307307 src = src .float ()
308-
308+
309309 if sample_rate <= 0 :
310310 raise ValueError (f"sample_rate must be positive, got { sample_rate } " )
311-
311+
312312 # Handle tensor shape and channels_first
313313 if src .ndim == 1 :
314314 # Convert to 2D: [1, time] for channels_first=True
@@ -324,13 +324,13 @@ def save_with_torchcodec(
324324 data = src .transpose (0 , 1 ) # [time, channel] -> [channel, time]
325325 else :
326326 raise ValueError (f"Expected 1D or 2D tensor, got { src .ndim } D tensor" )
327-
327+
328328 # Create AudioEncoder
329329 try :
330330 encoder = AudioEncoder (data , sample_rate = sample_rate )
331331 except Exception as e :
332332 raise RuntimeError (f"Failed to create AudioEncoder: { e } " ) from e
333-
333+
334334 # Determine bit_rate from compression parameter
335335 bit_rate = None
336336 if compression is not None :
@@ -344,7 +344,7 @@ def save_with_torchcodec(
344344 UserWarning ,
345345 stacklevel = 2
346346 )
347-
347+
348348 # Save to file
349349 try :
350350 encoder .to_file (uri , bit_rate = bit_rate )
0 commit comments