schuler-henry
diff --git a/‎code/main.py‎
Lines changed: 8 additions & 0 deletions b/‎code/main.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎code/playground/windowing/AudioPreprocessor.py‎
Lines changed: 137 additions & 0 deletions b/‎code/playground/windowing/AudioPreprocessor.py‎
Lines changed: 137 additions & 0 deletions
diff --git a/‎code/playground/windowing/__pycache__/fileHandler.cpython-310.pyc‎
1.99 KB b/‎code/playground/windowing/__pycache__/fileHandler.cpython-310.pyc‎
1.99 KB
diff --git a/‎code/playground/windowing/fileHandler.py‎
Lines changed: 47 additions & 0 deletions b/‎code/playground/windowing/fileHandler.py‎
Lines changed: 47 additions & 0 deletions
@@ -0,0 +1,8 @@
+from preprocessing.AudioPreprocessor import AudioPreprocessor
+
+def main():
+    frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
+    print(frames)
+    
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,137 @@
+import numpy as np
+import librosa
+import librosa.display
+import noisereduce as nr
+
+class AudioPreprocessor:
+    @staticmethod
+    def int_to_float(array, type=np.float32):
+        """
+        Change np.array int16 into np.float32
+        Parameters
+        ----------
+        array: np.array
+        type: np.float32
+        Returns
+        -------
+        result : np.array
+        """
+
+        if array.dtype == type:
+            return array
+
+        if array.dtype not in [np.float16, np.float32, np.float64]:
+            if np.max(np.abs(array)) == 0:
+                array = array.astype(np.float32)
+                array[:] = 0
+            else:
+                array = array.astype(np.float32) / np.max(np.abs(array))
+
+        return array
+
+    @staticmethod
+    def float_to_int(array, type=np.int16, divide_max_abs=True):
+        """
+        Change np.array float32 / float64 into np.int16
+        Parameters
+        ----------
+        array: np.array
+        type: np.int16
+        Returns
+        -------
+        result : np.array
+        """
+
+        if array.dtype == type:
+            return array
+
+        if array.dtype not in [np.int16, np.int32, np.int64]:
+            if np.max(np.abs(array)) == 0:
+                array[:] = 0
+                array = type(array * np.iinfo(type).max)
+            else:
+                if divide_max_abs:
+                    array = type(array / np.max(np.abs(array)) * np.iinfo(type).max)
+                else:
+                    array = type(array * np.iinfo(type).max)
+
+        return array
+
+    @staticmethod
+    def remove_silence(y):
+        threshold = 0.005
+        pause_length_in_ms = 200
+        keep_at_start_and_end = 50
+        counter_below_threshold = 0
+        indices_to_remove = []
+        
+        for i, amp in enumerate(y):
+            if abs(amp) < threshold:
+                counter_below_threshold += 1
+            else:
+                if counter_below_threshold > pause_length_in_ms:
+                    for index in range(i-counter_below_threshold+keep_at_start_and_end, i-keep_at_start_and_end):
+                        indices_to_remove.append(index)
+                counter_below_threshold = 0
+
+        if counter_below_threshold > pause_length_in_ms:
+            for index in range(len(y)-counter_below_threshold+keep_at_start_and_end, len(y)-keep_at_start_and_end):
+                indices_to_remove.append(index)
+
+        y_ = np.delete(y, indices_to_remove)
+
+        return y_
+
+    @staticmethod
+    def remove_noise(y, sr):
+        # prop_decrease 0.8 only reduces noise by 0.8 -> sound quality is better than at 1.0
+        y_ = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)
+
+        return y_
+
+    @staticmethod
+    def create_frames(y, frame_size, overlap):
+        frames = []
+        
+        if overlap >= frame_size or frame_size <= 0 or overlap < 0:
+            return frames
+
+        index = 0
+        
+        while index + frame_size < y.shape[0]:
+            frames.append(y[index: index + frame_size])
+            index = index + frame_size - overlap
+        
+        return frames
+
+    @staticmethod
+    def window_frames(frames, window_function=np.hanning):
+        windowed_frames = []
+
+        for frame in frames:
+            windowed_frames.append(frame * window_function(frame.shape[0]))
+
+        return windowed_frames
+
+    @staticmethod
+    def load_preprocessed_frames(filepath=None, y=None, sr=None):
+        if filepath is None and (y is None or sr is None):
+            raise ValueError("Either filepath or y and sr must be given.")
+        
+        if y is None or sr is None:
+            y, sr = librosa.load(filepath)
+
+        y = AudioPreprocessor.remove_noise(y=y, sr=sr)
+        y = AudioPreprocessor.remove_silence(y=y)
+
+        frames = AudioPreprocessor.create_frames(y=y, frame_size=1000, overlap=100)
+        windowed_frames = AudioPreprocessor.window_frames(frames=frames)
+
+        return windowed_frames
+
+def main():
+    frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
+    print(frames)
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,47 @@
+from tracemalloc import start
+import librosa
+import numpy as np
+import matplotlib.pyplot as plt
+
+class FileHandler:
+  def __init__(self, filepath):
+    self.y, self.sampling_rate = librosa.load(filepath)
+    self.total_time = self.y.size / self.sampling_rate
+    
+    print(self.total_time)
+    
+  def get_sampling_rate(self):
+    return self.sampling_rate  
+
+  def get_frame(self, frame_time, start_frame):
+    frame_frames = int(self.sampling_rate * frame_time)
+    return self.y[start_frame:(start_frame + frame_frames)], frame_frames
+    
+  def view(self):
+    plt.plot(np.linspace(0, self.y.size, self.y.size), self.y)
+    plt.show()
+    
+  def autocorrelate(self, frame_size):
+    frame_frames = int(self.sampling_rate * frame_size)
+    frame_y = self.y[3200:(3200 + frame_frames)]
+    Fr = np.fft.fft(frame_y)
+    S = Fr * np.conjugate(Fr)
+    print(Fr)
+    
+    print(abs(np.fft.ifft(S))[:10])
+    print(abs(np.fft.ifft(S)).size)
+    
+    print(librosa.autocorrelate(frame_y)[:10])
+    print(librosa.autocorrelate(frame_y).size)
+    
+    plt.plot(np.linspace(0, frame_frames, frame_frames), frame_y)
+    plt.show()
+    plt.plot(np.linspace(0, frame_frames, frame_frames), np.fft.ifft(S))
+    plt.plot(np.linspace(0, frame_frames, frame_frames), librosa.autocorrelate(frame_y))
+    plt.show()
+    return librosa.autocorrelate(frame_y * np.hanning(frame_frames))
+    
+  def get_lpc(self, frame_time, order):
+    frame_y, frame_frames = self.get_frame(frame_time, 3200)
+
+    return librosa.lpc(frame_y * np.hanning(frame_frames), order=order)