Skip to content

Commit 6959f08

Browse files
author
Schuler Henry Martin (BhP/HRL3.2-SH1)
committed
Added pre-processing (partly from studienarbeit)
1 parent 726a9a5 commit 6959f08

File tree

2 files changed

+222
-12
lines changed

2 files changed

+222
-12
lines changed

playground/windowing/main.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ def test7(src, frame_time, start_time):
619619
f_time = np.linspace(0, 1000*frame_time, samples)
620620

621621
fig, axs = plt.subplots(20)
622-
fig_all, axs_all = plt.subplots(1)
622+
# fig_all, axs_all = plt.subplots(1)
623623

624624
max_value = np.zeros(20)
625625
sum_value = np.zeros(20)
@@ -630,10 +630,12 @@ def test7(src, frame_time, start_time):
630630

631631
axs[i].plot(f_time, y[start_sample:end_sample])
632632

633-
lpc_values = librosa.lpc(y[start_sample:end_sample], order=12)
633+
lpc_values = librosa.lpc(y[start_sample:end_sample], order=12) # a1 bis a12 sind die lpc werte; a0 ist standardmäßig 1
634+
print(lpc_values)
634635
axs2[i].plot(np.linspace(0, 13, 13), lpc_values, label="test")
635-
axs2[i].plot(np.linspace(0, 13, 13), lpc_to_lpcc(lpc_values), label="lpcc")
636-
axs_all.plot(np.linspace(0, 13, 13), lpc_values, label=f"Round: {i}")
636+
axs3[i].plot(np.linspace(0, 13, 13), lpc_to_lpcc(lpc_values), label="lpcc")
637+
# axs4[i].plot(np.linspace(0, 13, 13), lpcc_to_wlpcc(lpc_to_lpcc(lpc_values)), label="wlpcc")
638+
# axs_all.plot(np.linspace(0, 13, 13), lpc_values, label=f"Round: {i}")
637639

638640
if max_value[i] < np.abs(np.max(lpc_values)):
639641
max_value[i] = np.abs(np.max(lpc_values))
@@ -646,11 +648,11 @@ def test7(src, frame_time, start_time):
646648
start_sample += samples
647649
end_sample+= samples
648650

649-
fig_overall, axs_overall = plt.subplots(1)
650-
axs_overall.plot(np.linspace(0, 20, 20), max_value, label="max")
651-
axs_center.plot(np.linspace(0, 20, 20), center, label=f"center {frame_time}")
652-
axs_center.legend()
653-
axs_overall.legend()
651+
# fig_overall, axs_overall = plt.subplots(1)
652+
# axs_overall.plot(np.linspace(0, 20, 20), max_value, label="max")
653+
# axs_center.plot(np.linspace(0, 20, 20), center, label=f"center {frame_time}")
654+
# axs_center.legend()
655+
# axs_overall.legend()
654656

655657
plt.ylim(-1000, 1000)
656658

@@ -662,24 +664,45 @@ def plotAudio(src):
662664

663665
def lpc_to_lpcc(lpc):
664666
lpcc = np.zeros(lpc.shape)
665-
lpcc[0] = lpc[0]
667+
lpcc[0] = lpc[0]
666668
for i in range(2, lpc.shape[0] + 1):
667669
lpcc[i-1] = sum((1-k/i)* lpc[k-1] * lpcc[i-k-1] for k in range(1, i)) + lpc[i-1]
670+
# lpcc[1] = lpc[1]
671+
# for i in range(2, lpc.shape[0]):
672+
# lpcc[i] = sum((1-k/i)* lpc[k] * lpcc[i-k] for k in range(1, i)) + lpc[i]
668673
return lpcc
674+
675+
def lpcc_to_wlpcc(lpcc):
676+
Q = lpcc.shape[0] - 1
677+
def w(m):
678+
return 1 + (Q/2) * np.sin((np.pi * m)/Q)
679+
680+
wlpcc = np.zeros(lpcc.shape)
681+
682+
for i in range(1, Q + 1):
683+
wlpcc[i] = w(i) * lpcc[i]
684+
685+
return wlpcc
686+
687+
669688
# test2()
670689
# test3()
671690
# burg_marple(4, [1.0,2.0,3.0, 4.0])
672691

673692
# plotAudio("C:\\Users\\SCU8BH\\Documents\\T3000\\Hallo, das ist ein Test.wav")
674693
# plotAudio("C:\\Users\\SCU8BH\\Documents\\T3000\\Account einloggen.wav")
675694

676-
fig_x, axs_center = plt.subplots(1)
695+
# fig_x, axs_center = plt.subplots(1)
677696
fig2, axs2 = plt.subplots(20)
697+
fig3, axs3 = plt.subplots(20)
698+
fig4, axs4 = plt.subplots(20)
678699
test7("C:\\Users\\SCU8BH\\Documents\\T3000\\Hallo, das ist ein Test.wav", 0.1, 1.44)
679700
test7("C:\\Users\\SCU8BH\\Documents\\T3000\\Hallo, das ist ein Test.wav", 0.05, 1.44)
680701
test7("C:\\Users\\SCU8BH\\Documents\\T3000\\Hallo, das ist ein Test.wav", 0.01, 1.44)
681-
fig_x, axs_center = plt.subplots(1)
702+
# fig_x, axs_center = plt.subplots(1)
682703
fig2, axs2 = plt.subplots(20)
704+
fig3, axs3 = plt.subplots(20)
705+
fig4, axs4 = plt.subplots(20)
683706
test7("C:\\Users\\SCU8BH\\Documents\\T3000\\Account einloggen.wav", 0.1, 0.89)
684707
test7("C:\\Users\\SCU8BH\\Documents\\T3000\\Account einloggen.wav", 0.05, 0.89)
685708
test7("C:\\Users\\SCU8BH\\Documents\\T3000\\Account einloggen.wav", 0.01, 0.89)
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
import librosa
4+
import librosa.display
5+
import soundfile as sf
6+
import scipy
7+
import os
8+
import math
9+
import random
10+
import pandas as pd
11+
from tabulate import tabulate
12+
from pydub import AudioSegment
13+
from pydub.silence import split_on_silence
14+
import noisereduce as nr
15+
16+
class AudioPreprocessor:
17+
@staticmethod
18+
def int_to_float(array, type=np.float32):
19+
"""
20+
Change np.array int16 into np.float32
21+
Parameters
22+
----------
23+
array: np.array
24+
type: np.float32
25+
Returns
26+
-------
27+
result : np.array
28+
"""
29+
30+
if array.dtype == type:
31+
return array
32+
33+
if array.dtype not in [np.float16, np.float32, np.float64]:
34+
if np.max(np.abs(array)) == 0:
35+
array = array.astype(np.float32)
36+
array[:] = 0
37+
else:
38+
array = array.astype(np.float32) / np.max(np.abs(array))
39+
40+
return array
41+
42+
@staticmethod
43+
def float_to_int(array, type=np.int16, divide_max_abs=True):
44+
"""
45+
Change np.array float32 / float64 into np.int16
46+
Parameters
47+
----------
48+
array: np.array
49+
type: np.int16
50+
Returns
51+
-------
52+
result : np.array
53+
"""
54+
55+
if array.dtype == type:
56+
return array
57+
58+
if array.dtype not in [np.int16, np.int32, np.int64]:
59+
if np.max(np.abs(array)) == 0:
60+
array[:] = 0
61+
array = type(array * np.iinfo(type).max)
62+
else:
63+
if divide_max_abs:
64+
array = type(array / np.max(np.abs(array)) * np.iinfo(type).max)
65+
else:
66+
array = type(array * np.iinfo(type).max)
67+
return array
68+
69+
@staticmethod
70+
def remove_silence(y):
71+
threshold = 0.0005
72+
pause_length_in_ms = 800
73+
counter_below_threshold = 0
74+
indices_to_remove = []
75+
keep_at_start_and_end = 200
76+
77+
for i, amp in enumerate(y):
78+
if abs(amp) < threshold:
79+
counter_below_threshold += 1
80+
else:
81+
if counter_below_threshold > pause_length_in_ms:
82+
for index in range(i-counter_below_threshold+keep_at_start_and_end, i-keep_at_start_and_end):
83+
indices_to_remove.append(index)
84+
counter_below_threshold = 0
85+
86+
if counter_below_threshold > pause_length_in_ms:
87+
for index in range(len(y)-counter_below_threshold+keep_at_start_and_end, len(y)-keep_at_start_and_end):
88+
indices_to_remove.append(index)
89+
90+
y_ = np.delete(y, indices_to_remove)
91+
92+
return y_
93+
94+
@staticmethod
95+
def clip(y, sr, seconds):
96+
return y[0:seconds*sr]
97+
98+
@staticmethod
99+
def framing(y, sr, frame_size, overlap):
100+
frames = []
101+
if overlap >= frame_size or frame_size <= 0 or overlap < 0:
102+
return frames
103+
104+
index = 0
105+
while index + frame_size < y.shape[0]:
106+
frames.append(y[index: index + frame_size])
107+
index = index + frame_size - overlap
108+
109+
return frames
110+
111+
@staticmethod
112+
def windowing(frames, window):
113+
windowed_frames = []
114+
for frame in frames:
115+
windowed_frames.append(frame * window(frame.shape[0]))
116+
return windowed_frames
117+
118+
class LPCCProcessor:
119+
@staticmethod
120+
def lpc(frames, order=12):
121+
lpc_coefficients = []
122+
for frame in frames:
123+
lpc_coefficients.append(librosa.lpc(frame, order))
124+
return lpc_coefficients
125+
126+
@staticmethod
127+
def lpcc(lpcs, order=12):
128+
lpcc_coefficients = []
129+
for lpc in lpcs:
130+
# lpc = lpc/np.max(np.abs(lpc))
131+
lpcc = np.zeros(order+1)
132+
lpcc[0] = lpc[0]
133+
lpcc[1] = lpc[1]
134+
for n in range (2, order+1):
135+
if n < lpc.shape[0] - 1:
136+
lpcc[n] = sum((1-k/n) * lpc[k] * lpcc[n-k] for k in range(1, n)) + lpc[n]
137+
else:
138+
lpcc[n] = sum((1-k/n) * lpc[k] * lpcc[n-k] for k in range(1, n))
139+
lpcc_coefficients.append(lpcc)
140+
return lpcc_coefficients
141+
142+
143+
def plot(y, sr):
144+
fig, axs = plt.subplots(1)
145+
axs.plot(np.linspace(0, y.shape[0]/sr, y.shape[0]), y)
146+
147+
def writefile(y, sr, filename):
148+
sf.write(filename, y, sr)
149+
150+
def main():
151+
y, sr = librosa.load("C:\\Users\\SCU8BH\\Documents\\T3000\\Studienarbeit\\Data\\50_speakers_audio_data\\Speaker_0002\\Speaker_0002_00000.wav")
152+
plot(y, sr)
153+
y = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)
154+
plot(y, sr)
155+
writefile(y, sr, "C:\\Users\\SCU8BH\\Documents\\T3000\\remove_noise.wav")
156+
y_ = AudioPreprocessor.remove_silence(y=y)
157+
y = y_ * np.max(np.abs(y))/np.max(np.abs(y_))
158+
plot(y, sr)
159+
writefile(y, sr, "C:\\Users\\SCU8BH\\Documents\\T3000\\remove_silence.wav")
160+
clip = AudioPreprocessor.clip(y, sr, 8)
161+
clip = clip/(np.max(np.abs(clip)))
162+
frames = AudioPreprocessor.framing(clip, sr, 1000, 100)
163+
windowed_frames = AudioPreprocessor.windowing(frames, np.hanning)
164+
lpcs = LPCCProcessor.lpc(windowed_frames)
165+
lpccs = LPCCProcessor.lpcc(lpcs)
166+
fig, axs = plt.subplots(4)
167+
for lpcc in lpccs:
168+
axs[0].plot(lpcc)
169+
print(lpcs[0])
170+
print(len(lpccs))
171+
172+
y, sr = librosa.load("C:\\Users\\SCU8BH\\Documents\\T3000\\Studienarbeit\\Data\\50_speakers_audio_data\\Speaker_0003\\Speaker_0003_00000.wav")
173+
lpccs = LPCCProcessor.lpcc(LPCCProcessor.lpc(AudioPreprocessor.windowing(AudioPreprocessor.framing(AudioPreprocessor.clip(AudioPreprocessor.remove_silence(nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)), sr, 8), sr, 1000, 100), np.hanning)))
174+
for lpcc in lpccs:
175+
axs[1].plot(lpcc)
176+
y, sr = librosa.load("C:\\Users\\SCU8BH\\Documents\\T3000\\Studienarbeit\\Data\\50_speakers_audio_data\\Speaker_0004\\Speaker_0004_00000.wav")
177+
lpccs = LPCCProcessor.lpcc(LPCCProcessor.lpc(AudioPreprocessor.windowing(AudioPreprocessor.framing(AudioPreprocessor.clip(AudioPreprocessor.remove_silence(nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)), sr, 8), sr, 1000, 100), np.hanning)))
178+
for lpcc in lpccs:
179+
axs[2].plot(lpcc)
180+
y, sr = librosa.load("C:\\Users\\SCU8BH\\Documents\\T3000\\Studienarbeit\\Data\\50_speakers_audio_data\\Speaker_0005\\Speaker_0005_00000.wav")
181+
lpccs = LPCCProcessor.lpcc(LPCCProcessor.lpc(AudioPreprocessor.windowing(AudioPreprocessor.framing(AudioPreprocessor.clip(AudioPreprocessor.remove_silence(nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)), sr, 8), sr, 1000, 100), np.hanning)))
182+
for lpcc in lpccs:
183+
axs[3].plot(lpcc)
184+
plt.show()
185+
186+
if __name__ == '__main__':
187+
main()

0 commit comments

Comments
 (0)