Skip to content

Commit bf6c617

Browse files
author
Schuler Henry Martin (BhP/HRL3.2-SH1)
committed
Completed windowing testing.
Added preprocessor. Created basic code structure.
1 parent 6959f08 commit bf6c617

File tree

12 files changed

+287
-0
lines changed

12 files changed

+287
-0
lines changed

code/main.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from preprocessing.AudioPreprocessor import AudioPreprocessor
2+
3+
def main():
4+
frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
5+
print(frames)
6+
7+
if __name__ == "__main__":
8+
main()
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import numpy as np
2+
import librosa
3+
import librosa.display
4+
import noisereduce as nr
5+
6+
class AudioPreprocessor:
7+
@staticmethod
8+
def int_to_float(array, type=np.float32):
9+
"""
10+
Change np.array int16 into np.float32
11+
Parameters
12+
----------
13+
array: np.array
14+
type: np.float32
15+
Returns
16+
-------
17+
result : np.array
18+
"""
19+
20+
if array.dtype == type:
21+
return array
22+
23+
if array.dtype not in [np.float16, np.float32, np.float64]:
24+
if np.max(np.abs(array)) == 0:
25+
array = array.astype(np.float32)
26+
array[:] = 0
27+
else:
28+
array = array.astype(np.float32) / np.max(np.abs(array))
29+
30+
return array
31+
32+
@staticmethod
33+
def float_to_int(array, type=np.int16, divide_max_abs=True):
34+
"""
35+
Change np.array float32 / float64 into np.int16
36+
Parameters
37+
----------
38+
array: np.array
39+
type: np.int16
40+
Returns
41+
-------
42+
result : np.array
43+
"""
44+
45+
if array.dtype == type:
46+
return array
47+
48+
if array.dtype not in [np.int16, np.int32, np.int64]:
49+
if np.max(np.abs(array)) == 0:
50+
array[:] = 0
51+
array = type(array * np.iinfo(type).max)
52+
else:
53+
if divide_max_abs:
54+
array = type(array / np.max(np.abs(array)) * np.iinfo(type).max)
55+
else:
56+
array = type(array * np.iinfo(type).max)
57+
58+
return array
59+
60+
@staticmethod
61+
def remove_silence(y):
62+
threshold = 0.005
63+
pause_length_in_ms = 200
64+
keep_at_start_and_end = 50
65+
counter_below_threshold = 0
66+
indices_to_remove = []
67+
68+
for i, amp in enumerate(y):
69+
if abs(amp) < threshold:
70+
counter_below_threshold += 1
71+
else:
72+
if counter_below_threshold > pause_length_in_ms:
73+
for index in range(i-counter_below_threshold+keep_at_start_and_end, i-keep_at_start_and_end):
74+
indices_to_remove.append(index)
75+
counter_below_threshold = 0
76+
77+
if counter_below_threshold > pause_length_in_ms:
78+
for index in range(len(y)-counter_below_threshold+keep_at_start_and_end, len(y)-keep_at_start_and_end):
79+
indices_to_remove.append(index)
80+
81+
y_ = np.delete(y, indices_to_remove)
82+
83+
return y_
84+
85+
@staticmethod
86+
def remove_noise(y, sr):
87+
# prop_decrease 0.8 only reduces noise by 0.8 -> sound quality is better than at 1.0
88+
y_ = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)
89+
90+
return y_
91+
92+
@staticmethod
93+
def create_frames(y, frame_size, overlap):
94+
frames = []
95+
96+
if overlap >= frame_size or frame_size <= 0 or overlap < 0:
97+
return frames
98+
99+
index = 0
100+
101+
while index + frame_size < y.shape[0]:
102+
frames.append(y[index: index + frame_size])
103+
index = index + frame_size - overlap
104+
105+
return frames
106+
107+
@staticmethod
108+
def window_frames(frames, window_function=np.hanning):
109+
windowed_frames = []
110+
111+
for frame in frames:
112+
windowed_frames.append(frame * window_function(frame.shape[0]))
113+
114+
return windowed_frames
115+
116+
@staticmethod
117+
def load_preprocessed_frames(filepath=None, y=None, sr=None):
118+
if filepath is None and (y is None or sr is None):
119+
raise ValueError("Either filepath or y and sr must be given.")
120+
121+
if y is None or sr is None:
122+
y, sr = librosa.load(filepath)
123+
124+
y = AudioPreprocessor.remove_noise(y=y, sr=sr)
125+
y = AudioPreprocessor.remove_silence(y=y)
126+
127+
frames = AudioPreprocessor.create_frames(y=y, frame_size=1000, overlap=100)
128+
windowed_frames = AudioPreprocessor.window_frames(frames=frames)
129+
130+
return windowed_frames
131+
132+
def main():
133+
frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
134+
print(frames)
135+
136+
if __name__ == '__main__':
137+
main()
File renamed without changes.
File renamed without changes.
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
from utils.utils import Utils
2+
import numpy as np
3+
import noisereduce as nr
4+
5+
class AudioPreprocessor:
6+
@staticmethod
7+
def int_to_float(array, type=np.float32):
8+
"""
9+
Change np.array int16 into np.float32
10+
Parameters
11+
----------
12+
array: np.array
13+
type: np.float32
14+
Returns
15+
-------
16+
result : np.array
17+
"""
18+
19+
if array.dtype == type:
20+
return array
21+
22+
if array.dtype not in [np.float16, np.float32, np.float64]:
23+
if np.max(np.abs(array)) == 0:
24+
array = array.astype(np.float32)
25+
array[:] = 0
26+
else:
27+
array = array.astype(np.float32) / np.max(np.abs(array))
28+
29+
return array
30+
31+
@staticmethod
32+
def float_to_int(array, type=np.int16, divide_max_abs=True):
33+
"""
34+
Change np.array float32 / float64 into np.int16
35+
Parameters
36+
----------
37+
array: np.array
38+
type: np.int16
39+
Returns
40+
-------
41+
result : np.array
42+
"""
43+
44+
if array.dtype == type:
45+
return array
46+
47+
if array.dtype not in [np.int16, np.int32, np.int64]:
48+
if np.max(np.abs(array)) == 0:
49+
array[:] = 0
50+
array = type(array * np.iinfo(type).max)
51+
else:
52+
if divide_max_abs:
53+
array = type(array / np.max(np.abs(array)) * np.iinfo(type).max)
54+
else:
55+
array = type(array * np.iinfo(type).max)
56+
57+
return array
58+
59+
@staticmethod
60+
def remove_silence(y):
61+
threshold = 0.005
62+
pause_length_in_ms = 200
63+
keep_at_start_and_end = 50
64+
counter_below_threshold = 0
65+
indices_to_remove = []
66+
67+
for i, amp in enumerate(y):
68+
if abs(amp) < threshold:
69+
counter_below_threshold += 1
70+
else:
71+
if counter_below_threshold > pause_length_in_ms:
72+
for index in range(i-counter_below_threshold+keep_at_start_and_end, i-keep_at_start_and_end):
73+
indices_to_remove.append(index)
74+
counter_below_threshold = 0
75+
76+
if counter_below_threshold > pause_length_in_ms:
77+
for index in range(len(y)-counter_below_threshold+keep_at_start_and_end, len(y)-keep_at_start_and_end):
78+
indices_to_remove.append(index)
79+
80+
y_ = np.delete(y, indices_to_remove)
81+
82+
return y_
83+
84+
@staticmethod
85+
def remove_noise(y, sr):
86+
# prop_decrease 0.8 only reduces noise by 0.8 -> sound quality is better than at 1.0
87+
y_ = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)
88+
89+
return y_
90+
91+
@staticmethod
92+
def create_frames(y, frame_size, overlap):
93+
frames = []
94+
95+
if overlap >= frame_size or frame_size <= 0 or overlap < 0:
96+
return frames
97+
98+
index = 0
99+
100+
while index + frame_size < y.shape[0]:
101+
frames.append(y[index: index + frame_size])
102+
index = index + frame_size - overlap
103+
104+
return frames
105+
106+
@staticmethod
107+
def window_frames(frames, window_function=np.hanning):
108+
windowed_frames = []
109+
110+
for frame in frames:
111+
windowed_frames.append(frame * window_function(frame.shape[0]))
112+
113+
return windowed_frames
114+
115+
@staticmethod
116+
def load_preprocessed_frames(filepath=None, y=None, sr=None):
117+
if filepath is None and (y is None or sr is None):
118+
raise ValueError("Either filepath or y and sr must be given.")
119+
120+
if y is None or sr is None:
121+
y, sr = Utils.load_file(filepath)
122+
123+
y = AudioPreprocessor.remove_noise(y=y, sr=sr)
124+
y = AudioPreprocessor.remove_silence(y=y)
125+
126+
frames = AudioPreprocessor.create_frames(y=y, frame_size=1000, overlap=100)
127+
windowed_frames = AudioPreprocessor.window_frames(frames=frames)
128+
129+
return windowed_frames
130+
131+
def main():
132+
frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
133+
print(frames)
134+
135+
if __name__ == '__main__':
136+
main()
3.53 KB
Binary file not shown.
493 Bytes
Binary file not shown.

code/utils/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import librosa
2+
3+
class Utils:
4+
@staticmethod
5+
def load_file(file_path):
6+
return librosa.load(file_path)

0 commit comments

Comments
 (0)