|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "code", |
5 | | - "execution_count": 1, |
| 5 | + "execution_count": 2, |
6 | 6 | "metadata": {}, |
7 | 7 | "outputs": [], |
8 | 8 | "source": [ |
|
16 | 16 | }, |
17 | 17 | { |
18 | 18 | "cell_type": "code", |
19 | | - "execution_count": 52, |
| 19 | + "execution_count": 20, |
20 | 20 | "metadata": {}, |
21 | 21 | "outputs": [], |
22 | 22 | "source": [ |
|
26 | 26 | " return a[p], b[p]\n", |
27 | 27 | " \n", |
28 | 28 | "def get_data_set(count, speakers):\n", |
29 | | - " third = int(count/speakers)\n", |
| 29 | + " nn_input_chunks_per_speaker = int(count/speakers)\n", |
30 | 30 | " X = np.zeros((count, 12*20))\n", |
31 | 31 | " y = np.zeros(count, dtype='uint8')\n", |
32 | 32 | " \n", |
33 | | - " coefficients_per_speaker = third * 20\n", |
| 33 | + " frames_per_speaker = nn_input_chunks_per_speaker * 20\n", |
34 | 34 | " \n", |
35 | 35 | " all_speakers = []\n", |
| 36 | + " \n", |
36 | 37 | " for i in range(0, speakers):\n", |
37 | 38 | " all_speakers.append([])\n", |
38 | 39 | " index = 0\n", |
39 | | - " while (len(all_speakers[i]) < coefficients_per_speaker):\n", |
| 40 | + " while (len(all_speakers[i]) < frames_per_speaker):\n", |
40 | 41 | " print(index, end=\"\\r\")\n", |
41 | | - " y_, sr = Utils.load_file(f\"C:\\\\Users\\\\SCU8BH\\\\Documents\\\\T3000\\\\Studienarbeit\\\\Data\\\\50_speakers_audio_data\\\\Speaker{i+30:04}\\\\Speaker{i+30:02}_{index:03}.wav\")\n", |
| 42 | + " y_, sr = Utils.load_file(f\"/home/henry/Downloads/archive/50_speakers_audio_data/Speaker_{10+i:04}/Speaker_{10+i:04}_{index:05}.wav\")\n", |
42 | 43 | " \n", |
43 | 44 | " y_ = AudioPreprocessor.remove_noise(y=y_, sr=sr)\n", |
44 | 45 | " y_ = AudioPreprocessor.remove_silence(y=y_)\n", |
|
54 | 55 | " print()\n", |
55 | 56 | " \n", |
56 | 57 | " for i in range(0, speakers):\n", |
57 | | - " for j in range(0, third):\n", |
58 | | - " X[i*third + j] = np.concatenate((all_speakers[i][20*j][1:13], \n", |
| 58 | + " for j in range(0, nn_input_chunks_per_speaker):\n", |
| 59 | + " X[i*nn_input_chunks_per_speaker + j] = np.concatenate((all_speakers[i][20*j][1:13], \n", |
59 | 60 | " all_speakers[i][20*j+1][1:13], \n", |
60 | 61 | " all_speakers[i][20*j+2][1:13],\n", |
61 | 62 | " all_speakers[i][20*j+3][1:13],\n", |
|
76 | 77 | " all_speakers[i][20*j+18][1:13],\n", |
77 | 78 | " all_speakers[i][20*j+19][1:13]\n", |
78 | 79 | " ))\n", |
79 | | - " y[i*third + j] = i\n", |
| 80 | + " y[i*nn_input_chunks_per_speaker + j] = i\n", |
80 | 81 | " \n", |
81 | 82 | " return X, y" |
82 | 83 | ] |
83 | 84 | }, |
84 | 85 | { |
85 | 86 | "cell_type": "code", |
86 | | - "execution_count": 53, |
| 87 | + "execution_count": 21, |
87 | 88 | "metadata": {}, |
88 | 89 | "outputs": [ |
89 | 90 | { |
90 | 91 | "name": "stdout", |
91 | 92 | "output_type": "stream", |
92 | 93 | "text": [ |
93 | 94 | "8\n", |
94 | | - "11\n", |
| 95 | + "9\n", |
95 | 96 | "9\n", |
96 | 97 | "10\n", |
97 | | - "10\n" |
| 98 | + "9\n" |
98 | 99 | ] |
99 | 100 | } |
100 | 101 | ], |
|
106 | 107 | }, |
107 | 108 | { |
108 | 109 | "cell_type": "code", |
109 | | - "execution_count": 63, |
| 110 | + "execution_count": 28, |
110 | 111 | "metadata": {}, |
111 | 112 | "outputs": [ |
112 | 113 | { |
113 | 114 | "name": "stdout", |
114 | 115 | "output_type": "stream", |
115 | 116 | "text": [ |
116 | 117 | "[0 0 0 ... 4 4 4]\n", |
117 | | - "[4 2 3 ... 2 2 4]\n", |
118 | | - "29/29 [==============================] - 0s 1ms/step - loss: 2.1533e-05 - accuracy: 1.0000\n", |
119 | | - "Test accuracy: 1.0\n", |
120 | | - "Test loss: 2.1533451217692345e-05\n", |
121 | | - "4/4 [==============================] - 0s 1ms/step\n", |
122 | | - "[4 2 2 2 2 2 2 2 2 2 2 2 1 4 2 2 1 1 1 1 2 2 4 2 1 2 2 2 2 2 2 4 2 2 2 2 2\n", |
123 | | - " 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 4 1 2 2 2 2 2 1 2 3 1 1 2 2 2 4 2 4 2 2 2\n", |
124 | | - " 2 2 2 2 4 1 0 2 4 2 4 2 4 2 1 2 4 2 3 3 2 2 2 2 2 2 2 4 3 4 1 0 2 1 2 2 4\n", |
125 | | - " 2 2 4 2 2 0 0 0]\n", |
126 | | - "6\n", |
127 | | - "14\n", |
128 | | - "79\n", |
129 | | - "4\n", |
130 | | - "16\n" |
| 118 | + "[2 3 2 ... 0 4 2]\n", |
| 119 | + "29/29 [==============================] - 0s 1ms/step - loss: 0.8353 - accuracy: 0.6485\n", |
| 120 | + "Test accuracy: 0.6484715938568115\n", |
| 121 | + "Test loss: 0.8353310227394104\n", |
| 122 | + "4/4 [==============================] - 0s 2ms/step\n", |
| 123 | + "[2 4 2 4 4 4 4 4 2 4 0 2 2 2 1 2 2 4 2 3 4 4 3 4 2 2 3 3 2 2 2 4 4 2 3 4 4\n", |
| 124 | + " 0 4 0 2 4 2 4 4 4 4 4 2 3 0 2 2 4 2 2 4 2 0 2 4 2 2 4 4 2 0 4 2 2 4 4 2 2\n", |
| 125 | + " 3 0 2 4 3 2 2 2 4 2 2 0 4 0 3 4 3 2 2 0 4 2 0 2 2 4 3 2 2 4 2 0 2 2 2 4 4\n", |
| 126 | + " 2 2 2 2 2 2 4]\n", |
| 127 | + "12\n", |
| 128 | + "1\n", |
| 129 | + "54\n", |
| 130 | + "11\n", |
| 131 | + "40\n" |
131 | 132 | ] |
132 | 133 | } |
133 | 134 | ], |
|
154 | 155 | " print(f\"Test loss: {test_loss}\")\n", |
155 | 156 | " \n", |
156 | 157 | " \n", |
157 | | - " y_, sr = Utils.load_file(f\"C:\\\\Users\\\\SCU8BH\\\\Documents\\\\T3000\\\\Studienarbeit\\\\Data\\\\50_speakers_audio_data\\\\Speaker0032\\\\Speaker32_012.wav\")\n", |
| 158 | + " y_, sr = Utils.load_file(f\"/home/henry/Downloads/archive/50_speakers_audio_data/Speaker_0014/Speaker_0014_00020.wav\")\n", |
158 | 159 | " \n", |
159 | 160 | " y_ = AudioPreprocessor.remove_noise(y=y_, sr=sr)\n", |
160 | 161 | " y_ = AudioPreprocessor.remove_silence(y=y_)\n", |
|
207 | 208 | ], |
208 | 209 | "metadata": { |
209 | 210 | "kernelspec": { |
210 | | - "display_name": "Python 3.10.4 64-bit", |
| 211 | + "display_name": "Python 3", |
211 | 212 | "language": "python", |
212 | 213 | "name": "python3" |
213 | 214 | }, |
|
221 | 222 | "name": "python", |
222 | 223 | "nbconvert_exporter": "python", |
223 | 224 | "pygments_lexer": "ipython3", |
224 | | - "version": "3.10.4" |
| 225 | + "version": "3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0]" |
225 | 226 | }, |
226 | 227 | "orig_nbformat": 4, |
227 | 228 | "vscode": { |
228 | 229 | "interpreter": { |
229 | | - "hash": "2fc4d7ba6602d69fe52dcf13f0361bb9556610661c910f56182baab83bdef03f" |
| 230 | + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" |
230 | 231 | } |
231 | 232 | } |
232 | 233 | }, |
|
0 commit comments