Neural network model files

GregorR · jmvalin · commit 5e7af83876dd · 2019-05-29T00:37:07.000-04:00
Extending the neural network dumper to dump to a simple text file
format, and adding reader functions to read a neural network description
from a FILE *.
diff --git a/Makefile.am b/Makefile.am
@@ -22,6 +22,7 @@ librnnoise_la_SOURCES = \
 	src/denoise.c \
 	src/rnn.c \
 	src/rnn_data.c \
+	src/rnn_reader.c \
 	src/pitch.c \
 	src/kiss_fft.c \
 	src/celt_lpc.c
diff --git a/include/rnnoise.h b/include/rnnoise.h
@@ -28,6 +28,9 @@
 #ifndef RNNOISE_H
 #define RNNOISE_H 1
 
+#include <stdio.h>
+
+
 #ifndef RNNOISE_EXPORT
 # if defined(WIN32)
 #  if defined(RNNOISE_BUILD) && defined(DLL_EXPORT)
@@ -42,7 +45,6 @@
 # endif
 #endif
 
-
 typedef struct DenoiseState DenoiseState;
 typedef struct RNNModel RNNModel;
 
@@ -56,4 +58,8 @@ RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st);
 
 RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in);
 
+RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f);
+
+RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model);
+
 #endif
diff --git a/src/rnn_reader.c b/src/rnn_reader.c
@@ -0,0 +1,168 @@
+/* Copyright (c) 2018 Gregor Richards */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "rnn.h"
+#include "rnn_data.h"
+#include "rnnoise.h"
+
+/* Although these values are the same as in rnn.h, we make them separate to
+ * avoid accidentally burning internal values into a file format */
+#define F_ACTIVATION_TANH       0
+#define F_ACTIVATION_SIGMOID    1
+#define F_ACTIVATION_RELU       2
+
+RNNModel *rnnoise_model_from_file(FILE *f)
+{
+    int i, in;
+
+    if (fscanf(f, "rnnoise-nu model file version %d\n", &in) != 1 || in != 1)
+        return NULL;
+
+    RNNModel *ret = calloc(1, sizeof(RNNModel));
+    if (!ret)
+        return NULL;
+
+#define ALLOC_LAYER(type, name) \
+    type *name; \
+    name = calloc(1, sizeof(type)); \
+    if (!name) { \
+        rnnoise_model_free(ret); \
+        return NULL; \
+    } \
+    ret->name = name
+
+    ALLOC_LAYER(DenseLayer, input_dense);
+    ALLOC_LAYER(GRULayer, vad_gru);
+    ALLOC_LAYER(GRULayer, noise_gru);
+    ALLOC_LAYER(GRULayer, denoise_gru);
+    ALLOC_LAYER(DenseLayer, denoise_output);
+    ALLOC_LAYER(DenseLayer, vad_output);
+
+#define INPUT_VAL(name) do { \
+    if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \
+        rnnoise_model_free(ret); \
+        return NULL; \
+    } \
+    name = in; \
+    } while (0)
+
+#define INPUT_ACTIVATION(name) do { \
+    int activation; \
+    INPUT_VAL(activation); \
+    switch (activation) { \
+        case F_ACTIVATION_SIGMOID: \
+            name = ACTIVATION_SIGMOID; \
+            break; \
+        case F_ACTIVATION_RELU: \
+            name = ACTIVATION_RELU; \
+            break; \
+        default: \
+            name = ACTIVATION_TANH; \
+    } \
+    } while (0)
+
+#define INPUT_ARRAY(name, len) do { \
+    rnn_weight *values = malloc((len) * sizeof(rnn_weight)); \
+    if (!values) { \
+        rnnoise_model_free(ret); \
+        return NULL; \
+    } \
+    name = values; \
+    for (i = 0; i < (len); i++) { \
+        if (fscanf(f, "%d", &in) != 1) { \
+            rnnoise_model_free(ret); \
+            return NULL; \
+        } \
+        values[i] = in; \
+    } \
+    } while (0)
+
+#define INPUT_DENSE(name) do { \
+    INPUT_VAL(name->nb_inputs); \
+    INPUT_VAL(name->nb_neurons); \
+    ret->name ## _size = name->nb_neurons; \
+    INPUT_ACTIVATION(name->activation); \
+    INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \
+    INPUT_ARRAY(name->bias, name->nb_neurons); \
+    } while (0)
+
+#define INPUT_GRU(name) do { \
+    INPUT_VAL(name->nb_inputs); \
+    INPUT_VAL(name->nb_neurons); \
+    ret->name ## _size = name->nb_neurons; \
+    INPUT_ACTIVATION(name->activation); \
+    INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons * 3); \
+    INPUT_ARRAY(name->recurrent_weights, name->nb_neurons * name->nb_neurons * 3); \
+    INPUT_ARRAY(name->bias, name->nb_neurons * 3); \
+    } while (0)
+
+    INPUT_DENSE(input_dense);
+    INPUT_GRU(vad_gru);
+    INPUT_GRU(noise_gru);
+    INPUT_GRU(denoise_gru);
+    INPUT_DENSE(denoise_output);
+    INPUT_DENSE(vad_output);
+
+    return ret;
+}
+
+void rnnoise_model_free(RNNModel *model)
+{
+#define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0)
+#define FREE_DENSE(name) do { \
+    if (model->name) { \
+        free((void *) model->name->input_weights); \
+        free((void *) model->name->bias); \
+        free((void *) model->name); \
+    } \
+    } while (0)
+#define FREE_GRU(name) do { \
+    if (model->name) { \
+        free((void *) model->name->input_weights); \
+        free((void *) model->name->recurrent_weights); \
+        free((void *) model->name->bias); \
+        free((void *) model->name); \
+    } \
+    } while (0)
+
+    if (!model)
+        return;
+    FREE_DENSE(input_dense);
+    FREE_GRU(vad_gru);
+    FREE_GRU(noise_gru);
+    FREE_GRU(denoise_gru);
+    FREE_DENSE(denoise_output);
+    FREE_DENSE(vad_output);
+    free(model);
+}
diff --git a/training/dump_rnn.py b/training/dump_rnn.py
@@ -12,32 +12,45 @@
 import re
 import numpy as np
 
-def printVector(f, vector, name):
+def printVector(f, ft, vector, name):
     v = np.reshape(vector, (-1));
     #print('static const float ', name, '[', len(v), '] = \n', file=f)
     f.write('static const rnn_weight {}[{}] = {{\n   '.format(name, len(v)))
     for i in range(0, len(v)):
         f.write('{}'.format(min(127, int(round(256*v[i])))))
+        ft.write('{}'.format(min(127, int(round(256*v[i])))))
         if (i!=len(v)-1):
             f.write(',')
         else:
             break;
+        ft.write(" ")
         if (i%8==7):
             f.write("\n   ")
         else:
             f.write(" ")
     #print(v, file=f)
     f.write('\n};\n\n')
+    ft.write("\n")
     return;
 
-def printLayer(f, layer):
+def printLayer(f, ft, layer):
     weights = layer.get_weights()
-    printVector(f, weights[0], layer.name + '_weights')
+    activation = re.search('function (.*) at', str(layer.activation)).group(1).upper()
     if len(weights) > 2:
-        printVector(f, weights[1], layer.name + '_recurrent_weights')
-    printVector(f, weights[-1], layer.name + '_bias')
+        ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1]/3))
+    else:
+        ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1]))
+    if activation == 'SIGMOID':
+        ft.write('1\n')
+    elif activation == 'RELU':
+        ft.write('2\n')
+    else:
+        ft.write('0\n')
+    printVector(f, ft, weights[0], layer.name + '_weights')
+    if len(weights) > 2:
+        printVector(f, ft, weights[1], layer.name + '_recurrent_weights')
+    printVector(f, ft, weights[-1], layer.name + '_bias')
     name = layer.name
-    activation = re.search('function (.*) at', str(layer.activation)).group(1).upper()
     if len(weights) > 2:
         f.write('static const GRULayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'
                 .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]/3, activation))
@@ -67,18 +80,20 @@ def mean_squared_sqrt_error(y_true, y_pred):
 weights = model.get_weights()
 
 f = open(sys.argv[2], 'w')
+ft = open(sys.argv[3], 'w')
 
 f.write('/*This file is automatically generated from a Keras model*/\n\n')
-f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n\n')
+f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n#include "rnn_data.h"\n\n')
+ft.write('rnnoise-nu model file version 1\n')
 
 layer_list = []
 for i, layer in enumerate(model.layers):
     if len(layer.get_weights()) > 0:
-        printLayer(f, layer)
+        printLayer(f, ft, layer)
     if len(layer.get_weights()) > 2:
         layer_list.append(layer.name)
 
-f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[3]))
+f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[4]))
 for i, layer in enumerate(model.layers):
     if len(layer.get_weights()) > 0:
         structLayer(f, layer)