|
45 | 45 | ###################################################################### |
46 | 46 | # Preparation |
47 | 47 | # ----------- |
48 | | -# |
49 | | -# .. note:: |
50 | | -# |
51 | | -# When running this tutorial in Google Colab, install the required packages |
52 | | -# |
53 | | -# .. code:: |
54 | | -# |
55 | | -# !pip install librosa |
56 | | -# |
| 48 | + |
57 | 49 | from IPython.display import Audio |
58 | 50 | from matplotlib.patches import Rectangle |
59 | 51 | from torchaudio.utils import download_asset |
@@ -284,31 +276,6 @@ def plot_fbank(fbank, title=None): |
284 | 276 |
|
285 | 277 | plot_fbank(mel_filters, "Mel Filter Bank - torchaudio") |
286 | 278 |
|
287 | | -###################################################################### |
288 | | -# Comparison against librosa |
289 | | -# ~~~~~~~~~~~~~~~~~~~~~~~~~~ |
290 | | -# |
291 | | -# For reference, here is the equivalent way to get the mel filter bank |
292 | | -# with ``librosa``. |
293 | | -# |
294 | | - |
295 | | -mel_filters_librosa = librosa.filters.mel( |
296 | | - sr=sample_rate, |
297 | | - n_fft=n_fft, |
298 | | - n_mels=n_mels, |
299 | | - fmin=0.0, |
300 | | - fmax=sample_rate / 2.0, |
301 | | - norm="slaney", |
302 | | - htk=True, |
303 | | -).T |
304 | | - |
305 | | -###################################################################### |
306 | | -# |
307 | | - |
308 | | -plot_fbank(mel_filters_librosa, "Mel Filter Bank - librosa") |
309 | | - |
310 | | -mse = torch.square(mel_filters - mel_filters_librosa).mean().item() |
311 | | -print("Mean Square Difference: ", mse) |
312 | 279 |
|
313 | 280 | ###################################################################### |
314 | 281 | # MelSpectrogram |
@@ -345,35 +312,6 @@ def plot_fbank(fbank, title=None): |
345 | 312 |
|
346 | 313 | plot_spectrogram(melspec[0], title="MelSpectrogram - torchaudio", ylabel="mel freq") |
347 | 314 |
|
348 | | -###################################################################### |
349 | | -# Comparison against librosa |
350 | | -# ~~~~~~~~~~~~~~~~~~~~~~~~~~ |
351 | | -# |
352 | | -# For reference, here is the equivalent means of generating mel-scale |
353 | | -# spectrograms with ``librosa``. |
354 | | -# |
355 | | - |
356 | | -melspec_librosa = librosa.feature.melspectrogram( |
357 | | - y=SPEECH_WAVEFORM.numpy()[0], |
358 | | - sr=sample_rate, |
359 | | - n_fft=n_fft, |
360 | | - hop_length=hop_length, |
361 | | - win_length=win_length, |
362 | | - center=True, |
363 | | - pad_mode="reflect", |
364 | | - power=2.0, |
365 | | - n_mels=n_mels, |
366 | | - norm="slaney", |
367 | | - htk=True, |
368 | | -) |
369 | | - |
370 | | -###################################################################### |
371 | | -# |
372 | | - |
373 | | -plot_spectrogram(melspec_librosa, title="MelSpectrogram - librosa", ylabel="mel freq") |
374 | | - |
375 | | -mse = torch.square(melspec - melspec_librosa).mean().item() |
376 | | -print("Mean Square Difference: ", mse) |
377 | 315 |
|
378 | 316 | ###################################################################### |
379 | 317 | # MFCC |
@@ -404,37 +342,6 @@ def plot_fbank(fbank, title=None): |
404 | 342 |
|
405 | 343 | plot_spectrogram(mfcc[0], title="MFCC") |
406 | 344 |
|
407 | | -###################################################################### |
408 | | -# Comparison against librosa |
409 | | -# ~~~~~~~~~~~~~~~~~~~~~~~~~~ |
410 | | -# |
411 | | - |
412 | | -melspec = librosa.feature.melspectrogram( |
413 | | - y=SPEECH_WAVEFORM.numpy()[0], |
414 | | - sr=sample_rate, |
415 | | - n_fft=n_fft, |
416 | | - win_length=win_length, |
417 | | - hop_length=hop_length, |
418 | | - n_mels=n_mels, |
419 | | - htk=True, |
420 | | - norm=None, |
421 | | -) |
422 | | - |
423 | | -mfcc_librosa = librosa.feature.mfcc( |
424 | | - S=librosa.core.spectrum.power_to_db(melspec), |
425 | | - n_mfcc=n_mfcc, |
426 | | - dct_type=2, |
427 | | - norm="ortho", |
428 | | -) |
429 | | - |
430 | | -###################################################################### |
431 | | -# |
432 | | - |
433 | | -plot_spectrogram(mfcc_librosa, title="MFCC (librosa)") |
434 | | - |
435 | | -mse = torch.square(mfcc - mfcc_librosa).mean().item() |
436 | | -print("Mean Square Difference: ", mse) |
437 | | - |
438 | 345 | ###################################################################### |
439 | 346 | # LFCC |
440 | 347 | # ---- |
|
0 commit comments