Skip to content

Commit 6e87700

Browse files
committed
Fix mel-spectrogram computation
1 parent f29c594 commit 6e87700

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

audio.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ def inv_spectrogram(spectrogram):
4545

4646
def melspectrogram(y):
4747
D = _lws_processor().stft(preemphasis(y)).T
48-
S = _amp_to_db(_linear_to_mel(np.abs(D)))
48+
S = _amp_to_db(_linear_to_mel(np.abs(D))) - hparams.ref_level_db
49+
if not hparams.allow_clipping_in_normalization:
50+
assert S.max() <= 0 and S.min() - hparams.min_level_db >= 0
4951
return _normalize(S)
5052

5153

hparams.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@
116116
preemphasis=0.97,
117117
min_level_db=-100,
118118
ref_level_db=20,
119+
# mel-spectrogram is normalized to [0, 1] for each utterance and clipping may
120+
# happen depends on min_level_db and ref_level_db, causing clipping noise.
121+
# If False, assertion is added to ensure no clipping happens.
122+
allow_clipping_in_normalization=False,
119123

120124
# Model:
121125
downsample_step=4, # must be 4 when builder="nyanko"

0 commit comments

Comments
 (0)