schuler-henry
diff --git a/‎code/main.py‎
Lines changed: 8 additions & 0 deletions b/‎code/main.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎code/playground/windowing/AudioPreprocessor.py‎
Lines changed: 137 additions & 0 deletions b/‎code/playground/windowing/AudioPreprocessor.py‎
Lines changed: 137 additions & 0 deletions
diff --git a/‎playground/windowing/__pycache__/fileHandler.cpython-310.pyc‎ renamed to ‎code/playground/windowing/__pycache__/fileHandler.cpython-310.pyc‎ b/‎playground/windowing/__pycache__/fileHandler.cpython-310.pyc‎ renamed to ‎code/playground/windowing/__pycache__/fileHandler.cpython-310.pyc‎
diff --git a/‎playground/windowing/fileHandler.py‎ renamed to ‎code/playground/windowing/fileHandler.py‎ b/‎playground/windowing/fileHandler.py‎ renamed to ‎code/playground/windowing/fileHandler.py‎
diff --git a/‎playground/windowing/main.py‎ renamed to ‎code/playground/windowing/main.py‎ b/‎playground/windowing/main.py‎ renamed to ‎code/playground/windowing/main.py‎
diff --git a/‎playground/windowing/pause-reduction.py‎ renamed to ‎code/playground/windowing/pause-reduction.py‎ b/‎playground/windowing/pause-reduction.py‎ renamed to ‎code/playground/windowing/pause-reduction.py‎
diff --git a/‎code/preprocessing/AudioPreprocessor.py‎
Lines changed: 136 additions & 0 deletions b/‎code/preprocessing/AudioPreprocessor.py‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎code/preprocessing/__pycache__/AudioPreprocessor.cpython-310.pyc‎
3.53 KB b/‎code/preprocessing/__pycache__/AudioPreprocessor.cpython-310.pyc‎
3.53 KB
diff --git a/‎code/utils/__pycache__/utils.cpython-310.pyc‎
493 Bytes b/‎code/utils/__pycache__/utils.cpython-310.pyc‎
493 Bytes
diff --git a/‎code/utils/utils.py‎
Lines changed: 6 additions & 0 deletions b/‎code/utils/utils.py‎
Lines changed: 6 additions & 0 deletions
@@ -0,0 +1,8 @@
+from preprocessing.AudioPreprocessor import AudioPreprocessor
+
+def main():
+    frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
+    print(frames)
+    
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,137 @@
+import numpy as np
+import librosa
+import librosa.display
+import noisereduce as nr
+
+class AudioPreprocessor:
+    @staticmethod
+    def int_to_float(array, type=np.float32):
+        """
+        Change np.array int16 into np.float32
+        Parameters
+        ----------
+        array: np.array
+        type: np.float32
+        Returns
+        -------
+        result : np.array
+        """
+
+        if array.dtype == type:
+            return array
+
+        if array.dtype not in [np.float16, np.float32, np.float64]:
+            if np.max(np.abs(array)) == 0:
+                array = array.astype(np.float32)
+                array[:] = 0
+            else:
+                array = array.astype(np.float32) / np.max(np.abs(array))
+
+        return array
+
+    @staticmethod
+    def float_to_int(array, type=np.int16, divide_max_abs=True):
+        """
+        Change np.array float32 / float64 into np.int16
+        Parameters
+        ----------
+        array: np.array
+        type: np.int16
+        Returns
+        -------
+        result : np.array
+        """
+
+        if array.dtype == type:
+            return array
+
+        if array.dtype not in [np.int16, np.int32, np.int64]:
+            if np.max(np.abs(array)) == 0:
+                array[:] = 0
+                array = type(array * np.iinfo(type).max)
+            else:
+                if divide_max_abs:
+                    array = type(array / np.max(np.abs(array)) * np.iinfo(type).max)
+                else:
+                    array = type(array * np.iinfo(type).max)
+
+        return array
+
+    @staticmethod
+    def remove_silence(y):
+        threshold = 0.005
+        pause_length_in_ms = 200
+        keep_at_start_and_end = 50
+        counter_below_threshold = 0
+        indices_to_remove = []
+        
+        for i, amp in enumerate(y):
+            if abs(amp) < threshold:
+                counter_below_threshold += 1
+            else:
+                if counter_below_threshold > pause_length_in_ms:
+                    for index in range(i-counter_below_threshold+keep_at_start_and_end, i-keep_at_start_and_end):
+                        indices_to_remove.append(index)
+                counter_below_threshold = 0
+
+        if counter_below_threshold > pause_length_in_ms:
+            for index in range(len(y)-counter_below_threshold+keep_at_start_and_end, len(y)-keep_at_start_and_end):
+                indices_to_remove.append(index)
+
+        y_ = np.delete(y, indices_to_remove)
+
+        return y_
+
+    @staticmethod
+    def remove_noise(y, sr):
+        # prop_decrease 0.8 only reduces noise by 0.8 -> sound quality is better than at 1.0
+        y_ = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)
+
+        return y_
+
+    @staticmethod
+    def create_frames(y, frame_size, overlap):
+        frames = []
+        
+        if overlap >= frame_size or frame_size <= 0 or overlap < 0:
+            return frames
+
+        index = 0
+        
+        while index + frame_size < y.shape[0]:
+            frames.append(y[index: index + frame_size])
+            index = index + frame_size - overlap
+        
+        return frames
+
+    @staticmethod
+    def window_frames(frames, window_function=np.hanning):
+        windowed_frames = []
+
+        for frame in frames:
+            windowed_frames.append(frame * window_function(frame.shape[0]))
+
+        return windowed_frames
+
+    @staticmethod
+    def load_preprocessed_frames(filepath=None, y=None, sr=None):
+        if filepath is None and (y is None or sr is None):
+            raise ValueError("Either filepath or y and sr must be given.")
+        
+        if y is None or sr is None:
+            y, sr = librosa.load(filepath)
+
+        y = AudioPreprocessor.remove_noise(y=y, sr=sr)
+        y = AudioPreprocessor.remove_silence(y=y)
+
+        frames = AudioPreprocessor.create_frames(y=y, frame_size=1000, overlap=100)
+        windowed_frames = AudioPreprocessor.window_frames(frames=frames)
+
+        return windowed_frames
+
+def main():
+    frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
+    print(frames)
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,136 @@
+from utils.utils import Utils
+import numpy as np
+import noisereduce as nr
+
+class AudioPreprocessor:
+    @staticmethod
+    def int_to_float(array, type=np.float32):
+        """
+        Change np.array int16 into np.float32
+        Parameters
+        ----------
+        array: np.array
+        type: np.float32
+        Returns
+        -------
+        result : np.array
+        """
+
+        if array.dtype == type:
+            return array
+
+        if array.dtype not in [np.float16, np.float32, np.float64]:
+            if np.max(np.abs(array)) == 0:
+                array = array.astype(np.float32)
+                array[:] = 0
+            else:
+                array = array.astype(np.float32) / np.max(np.abs(array))
+
+        return array
+
+    @staticmethod
+    def float_to_int(array, type=np.int16, divide_max_abs=True):
+        """
+        Change np.array float32 / float64 into np.int16
+        Parameters
+        ----------
+        array: np.array
+        type: np.int16
+        Returns
+        -------
+        result : np.array
+        """
+
+        if array.dtype == type:
+            return array
+
+        if array.dtype not in [np.int16, np.int32, np.int64]:
+            if np.max(np.abs(array)) == 0:
+                array[:] = 0
+                array = type(array * np.iinfo(type).max)
+            else:
+                if divide_max_abs:
+                    array = type(array / np.max(np.abs(array)) * np.iinfo(type).max)
+                else:
+                    array = type(array * np.iinfo(type).max)
+
+        return array
+
+    @staticmethod
+    def remove_silence(y):
+        threshold = 0.005
+        pause_length_in_ms = 200
+        keep_at_start_and_end = 50
+        counter_below_threshold = 0
+        indices_to_remove = []
+        
+        for i, amp in enumerate(y):
+            if abs(amp) < threshold:
+                counter_below_threshold += 1
+            else:
+                if counter_below_threshold > pause_length_in_ms:
+                    for index in range(i-counter_below_threshold+keep_at_start_and_end, i-keep_at_start_and_end):
+                        indices_to_remove.append(index)
+                counter_below_threshold = 0
+
+        if counter_below_threshold > pause_length_in_ms:
+            for index in range(len(y)-counter_below_threshold+keep_at_start_and_end, len(y)-keep_at_start_and_end):
+                indices_to_remove.append(index)
+
+        y_ = np.delete(y, indices_to_remove)
+
+        return y_
+
+    @staticmethod
+    def remove_noise(y, sr):
+        # prop_decrease 0.8 only reduces noise by 0.8 -> sound quality is better than at 1.0
+        y_ = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)
+
+        return y_
+
+    @staticmethod
+    def create_frames(y, frame_size, overlap):
+        frames = []
+        
+        if overlap >= frame_size or frame_size <= 0 or overlap < 0:
+            return frames
+
+        index = 0
+        
+        while index + frame_size < y.shape[0]:
+            frames.append(y[index: index + frame_size])
+            index = index + frame_size - overlap
+        
+        return frames
+
+    @staticmethod
+    def window_frames(frames, window_function=np.hanning):
+        windowed_frames = []
+
+        for frame in frames:
+            windowed_frames.append(frame * window_function(frame.shape[0]))
+
+        return windowed_frames
+
+    @staticmethod
+    def load_preprocessed_frames(filepath=None, y=None, sr=None):
+        if filepath is None and (y is None or sr is None):
+            raise ValueError("Either filepath or y and sr must be given.")
+        
+        if y is None or sr is None:
+            y, sr = Utils.load_file(filepath)
+
+        y = AudioPreprocessor.remove_noise(y=y, sr=sr)
+        y = AudioPreprocessor.remove_silence(y=y)
+
+        frames = AudioPreprocessor.create_frames(y=y, frame_size=1000, overlap=100)
+        windowed_frames = AudioPreprocessor.window_frames(frames=frames)
+
+        return windowed_frames
+
+def main():
+    frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
+    print(frames)
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,6 @@
+import librosa
+
+class Utils:
+    @staticmethod
+    def load_file(file_path):
+        return librosa.load(file_path)