Remove augmentation effector (#4002)

samanklesaria · web-flow · commit 3187fcb65d5e · 2025-07-31T15:20:07.000-05:00
* Remove effector from data augmentation tutorial

* Remove phone recording section

---------

Co-authored-by: Sam Anklesaria &lt;sanklesaria@openteams.com&gt;
diff --git a/examples/tutorials/audio_data_augmentation_tutorial.py b/examples/tutorials/audio_data_augmentation_tutorial.py
@@ -40,46 +40,16 @@
 
 
 ######################################################################
-# Applying effects and filtering
+# Loading the data
 # ------------------------------
 #
-# :py:class:`torchaudio.io.AudioEffector` allows for directly applying
-# filters and codecs to Tensor objects, in a similar way as ``ffmpeg``
-# command
-#
-# `AudioEffector Usages <./effector_tutorial.html>` explains how to use
-# this class, so for the detail, please refer to the tutorial.
-#
 
-# Load the data
 waveform1, sample_rate = torchaudio.load(SAMPLE_WAV, channels_first=False)
 
-# Define effects
-effect = ",".join(
-    [
-        "lowpass=frequency=300:poles=1",  # apply single-pole lowpass filter
-        "atempo=0.8",  # reduce the speed
-        "aecho=in_gain=0.8:out_gain=0.9:delays=200:decays=0.3|delays=400:decays=0.3"
-        # Applying echo gives some dramatic feeling
-    ],
-)
-
-
-# Apply effects
-def apply_effect(waveform, sample_rate, effect):
-    effector = torchaudio.io.AudioEffector(effect=effect)
-    return effector.apply(waveform, sample_rate)
-
-
-waveform2 = apply_effect(waveform1, sample_rate, effect)
-
 print(waveform1.shape, sample_rate)
-print(waveform2.shape, sample_rate)
 
 ######################################################################
-# Note that the number of frames and number of channels are different from
-# those of the original after the effects are applied. Let’s listen to the
-# audio.
+# Let’s listen to the audio.
 #
 
 
@@ -124,24 +94,11 @@ def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
 
 
 ######################################################################
-# Original
-# ~~~~~~~~
-#
 
 plot_waveform(waveform1.T, sample_rate, title="Original", xlim=(-0.1, 3.2))
 plot_specgram(waveform1.T, sample_rate, title="Original", xlim=(0, 3.04))
 Audio(waveform1.T, rate=sample_rate)
 
-######################################################################
-# Effects applied
-# ~~~~~~~~~~~~~~~
-#
-
-plot_waveform(waveform2.T, sample_rate, title="Effects Applied", xlim=(-0.1, 3.2))
-plot_specgram(waveform2.T, sample_rate, title="Effects Applied", xlim=(0, 3.04))
-Audio(waveform2.T, rate=sample_rate)
-
-
 ######################################################################
 # Simulating room reverberation
 # -----------------------------
@@ -265,143 +222,3 @@ def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
 plot_waveform(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
 plot_specgram(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
 Audio(noisy_speech, rate=sample_rate)
-
-
-######################################################################
-# Applying codec to Tensor object
-# -------------------------------
-#
-# :py:class:`torchaudio.io.AudioEffector` can also apply codecs to
-# a Tensor object.
-#
-
-waveform, sample_rate = torchaudio.load(SAMPLE_SPEECH, channels_first=False)
-
-
-def apply_codec(waveform, sample_rate, format, encoder=None):
-    encoder = torchaudio.io.AudioEffector(format=format, encoder=encoder)
-    return encoder.apply(waveform, sample_rate)
-
-
-######################################################################
-# Original
-# ~~~~~~~~
-#
-
-plot_waveform(waveform.T, sample_rate, title="Original")
-plot_specgram(waveform.T, sample_rate, title="Original")
-Audio(waveform.T, rate=sample_rate)
-
-######################################################################
-# 8 bit mu-law
-# ~~~~~~~~~~~~
-#
-
-mulaw = apply_codec(waveform, sample_rate, "wav", encoder="pcm_mulaw")
-plot_waveform(mulaw.T, sample_rate, title="8 bit mu-law")
-plot_specgram(mulaw.T, sample_rate, title="8 bit mu-law")
-Audio(mulaw.T, rate=sample_rate)
-
-######################################################################
-# G.722
-# ~~~~~
-#
-
-g722 = apply_codec(waveform, sample_rate, "g722")
-plot_waveform(g722.T, sample_rate, title="G.722")
-plot_specgram(g722.T, sample_rate, title="G.722")
-Audio(g722.T, rate=sample_rate)
-
-######################################################################
-# Vorbis
-# ~~~~~~
-#
-
-vorbis = apply_codec(waveform, sample_rate, "ogg", encoder="vorbis")
-plot_waveform(vorbis.T, sample_rate, title="Vorbis")
-plot_specgram(vorbis.T, sample_rate, title="Vorbis")
-Audio(vorbis.T, rate=sample_rate)
-
-######################################################################
-# Simulating a phone recoding
-# ---------------------------
-#
-# Combining the previous techniques, we can simulate audio that sounds
-# like a person talking over a phone in a echoey room with people talking
-# in the background.
-#
-
-sample_rate = 16000
-original_speech, sample_rate = torchaudio.load(SAMPLE_SPEECH)
-
-plot_specgram(original_speech, sample_rate, title="Original")
-
-# Apply RIR
-rir_applied = F.fftconvolve(speech, rir)
-
-plot_specgram(rir_applied, sample_rate, title="RIR Applied")
-
-# Add background noise
-# Because the noise is recorded in the actual environment, we consider that
-# the noise contains the acoustic feature of the environment. Therefore, we add
-# the noise after RIR application.
-noise, _ = torchaudio.load(SAMPLE_NOISE)
-noise = noise[:, : rir_applied.shape[1]]
-
-snr_db = torch.tensor([8])
-bg_added = F.add_noise(rir_applied, noise, snr_db)
-
-plot_specgram(bg_added, sample_rate, title="BG noise added")
-
-# Apply filtering and change sample rate
-effect = ",".join(
-    [
-        "lowpass=frequency=4000:poles=1",
-        "compand=attacks=0.02:decays=0.05:points=-60/-60|-30/-10|-20/-8|-5/-8|-2/-8:gain=-8:volume=-7:delay=0.05",
-    ]
-)
-
-filtered = apply_effect(bg_added.T, sample_rate, effect)
-sample_rate2 = 8000
-
-plot_specgram(filtered.T, sample_rate2, title="Filtered")
-
-# Apply telephony codec
-codec_applied = apply_codec(filtered, sample_rate2, "g722")
-plot_specgram(codec_applied.T, sample_rate2, title="G.722 Codec Applied")
-
-
-######################################################################
-# Original speech
-# ~~~~~~~~~~~~~~~
-#
-
-Audio(original_speech, rate=sample_rate)
-
-######################################################################
-# RIR applied
-# ~~~~~~~~~~~
-#
-
-Audio(rir_applied, rate=sample_rate)
-
-######################################################################
-# Background noise added
-# ~~~~~~~~~~~~~~~~~~~~~~
-#
-
-Audio(bg_added, rate=sample_rate)
-
-######################################################################
-# Filtered
-# ~~~~~~~~
-#
-
-Audio(filtered.T, rate=sample_rate2)
-
-######################################################################
-# Codec applied
-# ~~~~~~~~~~~~~
-#
-
-Audio(codec_applied.T, rate=sample_rate2)