many folks recommend to apply relu before batchnorma

hyzhak · hyzhak · commit 084c8d00ab42 · 2018-07-27T13:10:59.000+02:00
diff --git a/assignment2/nn/cnn.py b/assignment2/nn/cnn.py
@@ -10,15 +10,20 @@ def cnn(idx, X, filters, kernel_size, is_training,
     bconv = tf.get_variable(f'bconv{idx}', shape=[filters])
     out = tf.nn.conv2d(X, filter=Wconv, strides=strides, padding=padding) + bconv
 
-    # Spatial Batch Normalization Layer (trainable parameters, with scale and centering)
-    # axis=3 channel axis
-    if use_batchnorm:
-        out = tf.layers.batch_normalization(out, axis=3, training=is_training)
-
     if dropout is not None:
         out = tf.layers.dropout(out, rate=dropout, training=is_training)
 
     # ReLU Activation Layer
     out = tf.nn.relu(out)
 
+    # Actually it is interesting question where is it better to place batchnorm
+    # before activation (as it was in the original paper)
+    # or after activation - today many falks tell
+    # that it is better to apply after
+    #
+    # Spatial Batch Normalization Layer (trainable parameters, with scale and centering)
+    # axis=3 channel axis
+    if use_batchnorm:
+        out = tf.layers.batch_normalization(out, axis=3, training=is_training)
+
     return out, [Wconv, bconv]