1+ import numpy as np
2+ import matplotlib .pyplot as plt
3+ import librosa
4+ import librosa .display
5+ import soundfile as sf
6+ import scipy
7+ import os
8+ import math
9+ import random
10+ import pandas as pd
11+ from tabulate import tabulate
12+ from pydub import AudioSegment
13+ from pydub .silence import split_on_silence
14+ import noisereduce as nr
15+
16+ class AudioPreprocessor :
17+ @staticmethod
18+ def int_to_float (array , type = np .float32 ):
19+ """
20+ Change np.array int16 into np.float32
21+ Parameters
22+ ----------
23+ array: np.array
24+ type: np.float32
25+ Returns
26+ -------
27+ result : np.array
28+ """
29+
30+ if array .dtype == type :
31+ return array
32+
33+ if array .dtype not in [np .float16 , np .float32 , np .float64 ]:
34+ if np .max (np .abs (array )) == 0 :
35+ array = array .astype (np .float32 )
36+ array [:] = 0
37+ else :
38+ array = array .astype (np .float32 ) / np .max (np .abs (array ))
39+
40+ return array
41+
42+ @staticmethod
43+ def float_to_int (array , type = np .int16 , divide_max_abs = True ):
44+ """
45+ Change np.array float32 / float64 into np.int16
46+ Parameters
47+ ----------
48+ array: np.array
49+ type: np.int16
50+ Returns
51+ -------
52+ result : np.array
53+ """
54+
55+ if array .dtype == type :
56+ return array
57+
58+ if array .dtype not in [np .int16 , np .int32 , np .int64 ]:
59+ if np .max (np .abs (array )) == 0 :
60+ array [:] = 0
61+ array = type (array * np .iinfo (type ).max )
62+ else :
63+ if divide_max_abs :
64+ array = type (array / np .max (np .abs (array )) * np .iinfo (type ).max )
65+ else :
66+ array = type (array * np .iinfo (type ).max )
67+ return array
68+
69+ @staticmethod
70+ def remove_silence (y ):
71+ threshold = 0.0005
72+ pause_length_in_ms = 800
73+ counter_below_threshold = 0
74+ indices_to_remove = []
75+ keep_at_start_and_end = 200
76+
77+ for i , amp in enumerate (y ):
78+ if abs (amp ) < threshold :
79+ counter_below_threshold += 1
80+ else :
81+ if counter_below_threshold > pause_length_in_ms :
82+ for index in range (i - counter_below_threshold + keep_at_start_and_end , i - keep_at_start_and_end ):
83+ indices_to_remove .append (index )
84+ counter_below_threshold = 0
85+
86+ if counter_below_threshold > pause_length_in_ms :
87+ for index in range (len (y )- counter_below_threshold + keep_at_start_and_end , len (y )- keep_at_start_and_end ):
88+ indices_to_remove .append (index )
89+
90+ y_ = np .delete (y , indices_to_remove )
91+
92+ return y_
93+
94+ @staticmethod
95+ def clip (y , sr , seconds ):
96+ return y [0 :seconds * sr ]
97+
98+ @staticmethod
99+ def framing (y , sr , frame_size , overlap ):
100+ frames = []
101+ if overlap >= frame_size or frame_size <= 0 or overlap < 0 :
102+ return frames
103+
104+ index = 0
105+ while index + frame_size < y .shape [0 ]:
106+ frames .append (y [index : index + frame_size ])
107+ index = index + frame_size - overlap
108+
109+ return frames
110+
111+ @staticmethod
112+ def windowing (frames , window ):
113+ windowed_frames = []
114+ for frame in frames :
115+ windowed_frames .append (frame * window (frame .shape [0 ]))
116+ return windowed_frames
117+
118+ class LPCCProcessor :
119+ @staticmethod
120+ def lpc (frames , order = 12 ):
121+ lpc_coefficients = []
122+ for frame in frames :
123+ lpc_coefficients .append (librosa .lpc (frame , order ))
124+ return lpc_coefficients
125+
126+ @staticmethod
127+ def lpcc (lpcs , order = 12 ):
128+ lpcc_coefficients = []
129+ for lpc in lpcs :
130+ # lpc = lpc/np.max(np.abs(lpc))
131+ lpcc = np .zeros (order + 1 )
132+ lpcc [0 ] = lpc [0 ]
133+ lpcc [1 ] = lpc [1 ]
134+ for n in range (2 , order + 1 ):
135+ if n < lpc .shape [0 ] - 1 :
136+ lpcc [n ] = sum ((1 - k / n ) * lpc [k ] * lpcc [n - k ] for k in range (1 , n )) + lpc [n ]
137+ else :
138+ lpcc [n ] = sum ((1 - k / n ) * lpc [k ] * lpcc [n - k ] for k in range (1 , n ))
139+ lpcc_coefficients .append (lpcc )
140+ return lpcc_coefficients
141+
142+
143+ def plot (y , sr ):
144+ fig , axs = plt .subplots (1 )
145+ axs .plot (np .linspace (0 , y .shape [0 ]/ sr , y .shape [0 ]), y )
146+
147+ def writefile (y , sr , filename ):
148+ sf .write (filename , y , sr )
149+
150+ def main ():
151+ y , sr = librosa .load ("C:\\ Users\\ SCU8BH\\ Documents\\ T3000\\ Studienarbeit\\ Data\\ 50_speakers_audio_data\\ Speaker_0002\\ Speaker_0002_00000.wav" )
152+ plot (y , sr )
153+ y = nr .reduce_noise (y = y , sr = sr , prop_decrease = 0.8 )
154+ plot (y , sr )
155+ writefile (y , sr , "C:\\ Users\\ SCU8BH\\ Documents\\ T3000\\ remove_noise.wav" )
156+ y_ = AudioPreprocessor .remove_silence (y = y )
157+ y = y_ * np .max (np .abs (y ))/ np .max (np .abs (y_ ))
158+ plot (y , sr )
159+ writefile (y , sr , "C:\\ Users\\ SCU8BH\\ Documents\\ T3000\\ remove_silence.wav" )
160+ clip = AudioPreprocessor .clip (y , sr , 8 )
161+ clip = clip / (np .max (np .abs (clip )))
162+ frames = AudioPreprocessor .framing (clip , sr , 1000 , 100 )
163+ windowed_frames = AudioPreprocessor .windowing (frames , np .hanning )
164+ lpcs = LPCCProcessor .lpc (windowed_frames )
165+ lpccs = LPCCProcessor .lpcc (lpcs )
166+ fig , axs = plt .subplots (4 )
167+ for lpcc in lpccs :
168+ axs [0 ].plot (lpcc )
169+ print (lpcs [0 ])
170+ print (len (lpccs ))
171+
172+ y , sr = librosa .load ("C:\\ Users\\ SCU8BH\\ Documents\\ T3000\\ Studienarbeit\\ Data\\ 50_speakers_audio_data\\ Speaker_0003\\ Speaker_0003_00000.wav" )
173+ lpccs = LPCCProcessor .lpcc (LPCCProcessor .lpc (AudioPreprocessor .windowing (AudioPreprocessor .framing (AudioPreprocessor .clip (AudioPreprocessor .remove_silence (nr .reduce_noise (y = y , sr = sr , prop_decrease = 0.8 )), sr , 8 ), sr , 1000 , 100 ), np .hanning )))
174+ for lpcc in lpccs :
175+ axs [1 ].plot (lpcc )
176+ y , sr = librosa .load ("C:\\ Users\\ SCU8BH\\ Documents\\ T3000\\ Studienarbeit\\ Data\\ 50_speakers_audio_data\\ Speaker_0004\\ Speaker_0004_00000.wav" )
177+ lpccs = LPCCProcessor .lpcc (LPCCProcessor .lpc (AudioPreprocessor .windowing (AudioPreprocessor .framing (AudioPreprocessor .clip (AudioPreprocessor .remove_silence (nr .reduce_noise (y = y , sr = sr , prop_decrease = 0.8 )), sr , 8 ), sr , 1000 , 100 ), np .hanning )))
178+ for lpcc in lpccs :
179+ axs [2 ].plot (lpcc )
180+ y , sr = librosa .load ("C:\\ Users\\ SCU8BH\\ Documents\\ T3000\\ Studienarbeit\\ Data\\ 50_speakers_audio_data\\ Speaker_0005\\ Speaker_0005_00000.wav" )
181+ lpccs = LPCCProcessor .lpcc (LPCCProcessor .lpc (AudioPreprocessor .windowing (AudioPreprocessor .framing (AudioPreprocessor .clip (AudioPreprocessor .remove_silence (nr .reduce_noise (y = y , sr = sr , prop_decrease = 0.8 )), sr , 8 ), sr , 1000 , 100 ), np .hanning )))
182+ for lpcc in lpccs :
183+ axs [3 ].plot (lpcc )
184+ plt .show ()
185+
186+ if __name__ == '__main__' :
187+ main ()
0 commit comments