implement cnn in numpy

hyzhak · hyzhak · commit 4676c43dd211 · 2018-06-07T12:18:29.000+02:00
diff --git a/assignment2/ConvolutionalNetworks.ipynb b/assignment2/ConvolutionalNetworks.ipynb
diff --git a/assignment2/cs231n/classifiers/cnn.py b/assignment2/cs231n/classifiers/cnn.py
@@ -48,7 +48,21 @@ def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7,
         # hidden affine layer, and keys 'W3' and 'b3' for the weights and biases   #
         # of the output affine layer.                                              #
         ############################################################################
-        pass
+        C, H, W = input_dim
+        
+        self.params['W1'] = weight_scale * np.random.randn(num_filters, C, filter_size, filter_size)
+        self.params['b1'] = np.zeros(num_filters)
+
+        # max polling
+        H2 = H // 2
+        W2 = W // 2
+
+        self.params['W2'] = weight_scale * np.random.randn(num_filters * H2 * W2, hidden_dim)
+        self.params['b2'] = np.zeros(hidden_dim)
+
+        self.params['W3'] = weight_scale * np.random.randn(hidden_dim, num_classes)
+        self.params['b3'] = np.zeros(num_classes)
+
         ############################################################################
         #                             END OF YOUR CODE                             #
         ############################################################################
@@ -80,7 +94,19 @@ def loss(self, X, y=None):
         # computing the class scores for X and storing them in the scores          #
         # variable.                                                                #
         ############################################################################
-        pass
+        
+        # conv - relu -> 
+        out_1, cache_1 = conv_forward_fast(X, W1, b1, conv_param)
+        
+        # 2x2 max pool ->
+        out_2, cache_2 = max_pool_forward_fast(out_1, pool_param)
+
+        # affine - relu ->
+        out_3, cache_3 = affine_relu_forward(out_2, W2, b2)
+
+        # affine - softmax
+        scores, cache_4 = affine_forward(out_3, W3, b3)
+        
         ############################################################################
         #                             END OF YOUR CODE                             #
         ############################################################################
@@ -95,7 +121,20 @@ def loss(self, X, y=None):
         # data loss using softmax, and make sure that grads[k] holds the gradients #
         # for self.params[k]. Don't forget to add L2 regularization!               #
         ############################################################################
-        pass
+        loss, dx = softmax_loss(scores, y)
+        loss += self.reg * 0.5 * (np.sum(W1 * W1) + np.sum(W2 * W2) + np.sum(W3 * W3))
+        
+        dX3, dW3, db3 = affine_backward(dx, cache_4)
+        dX2, dW2, db2 = affine_relu_backward(dX3, cache_3)
+        dX1 = max_pool_backward_fast(dX2, cache_2)
+        dX0, dW1, db1 = conv_backward_fast(dX1, cache_1)
+        
+        grads['W3'] = dW3 + self.reg * self.params['W3']
+        grads['b3'] = db3
+        grads['W2'] = dW2 + self.reg * self.params['W2']
+        grads['b2'] = db2
+        grads['W1'] = dW1 + self.reg * self.params['W1']
+        grads['b1'] = db1
         ############################################################################
         #                             END OF YOUR CODE                             #
         ############################################################################
diff --git a/assignment2/cs231n/layers.py b/assignment2/cs231n/layers.py
@@ -483,6 +483,10 @@ def conv_backward_naive(dout, cache):
     Inputs:
     - dout: Upstream derivatives.
     - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive
+    
+    Some helpful explonation could be find there:
+    - https://medium.com/@2017csm1006/forward-and-backpropagation-in-convolutional-neural-network-4dfa96d7b37e
+    - https://becominghuman.ai/back-propagation-in-convolutional-neural-networks-intuition-and-code-714ef1c38199
 
     Returns a tuple of:
     - dx: Gradient with respect to x
@@ -551,7 +555,20 @@ def max_pool_forward_naive(x, pool_param):
     ###########################################################################
     # TODO: Implement the max pooling forward pass                            #
     ###########################################################################
-    pass
+    (N, C, H, W) = x.shape
+    pool_height = pool_param['pool_height']
+    pool_width = pool_param['pool_width']
+    stride = pool_param['stride']
+    out_H = H // stride
+    out_W = W // stride
+    out = np.zeros((N, C, out_H, out_W))
+    
+    for h in range(out_H):
+        for w in range(out_W):
+            xx = h * stride
+            yy = w * stride
+            out[:,:,h,w] = np.max(x[:,:,xx:xx + pool_height,yy:yy + pool_width], axis=(2,3))
+    
     ###########################################################################
     #                             END OF YOUR CODE                            #
     ###########################################################################
@@ -574,7 +591,28 @@ def max_pool_backward_naive(dout, cache):
     ###########################################################################
     # TODO: Implement the max pooling backward pass                           #
     ###########################################################################
-    pass
+    (x, pool_param) = cache
+
+    pool_height = pool_param['pool_height']
+    pool_width = pool_param['pool_width']
+    stride = pool_param['stride']
+    
+    dx = np.zeros_like(x)
+
+    (N, C, H, W) = dout.shape
+    for h in range(H):
+        for w in range(W):
+            xx = h * stride
+            yy = w * stride
+            # find which x were contribute to the out
+            x_frame = x[:,:,xx:xx + pool_height,yy:yy + pool_width]
+            # x_frame has shape = [number_of_samples, features] but should be [number_of_samples, features, h, w]
+            # or at least [number_of_samples, features, 1, 1]
+            # in other words operation "*[:,:,None,None]" changes dimesion 
+            # from [number_of_samples, features] to [number_of_samples, features, 1, 1]
+            x_mask = x_frame == x_frame.max(axis=(2,3))[:,:,None,None]
+            dx[:,:,xx:xx + pool_height,yy:yy + pool_width] += dout[:,:,h,w][:,:,None,None] * x_mask
+
     ###########################################################################
     #                             END OF YOUR CODE                            #
     ###########################################################################
@@ -612,7 +650,11 @@ def spatial_batchnorm_forward(x, gamma, beta, bn_param):
     # version of batch normalization defined above. Your implementation should#
     # be very short; ours is less than five lines.                            #
     ###########################################################################
-    pass
+    N, C, H, W = x.shape
+    # put channel at the last dimention and stretch all dimensions except the last one
+    flat_output, cache = batchnorm_forward(x.transpose(0,2,3,1).reshape((N*H*W,C)), gamma, beta, bn_param)
+    # revert dimensions
+    out = flat_output.reshape(N,H,W,C).transpose(0,3,1,2)
     ###########################################################################
     #                             END OF YOUR CODE                            #
     ###########################################################################
@@ -642,7 +684,9 @@ def spatial_batchnorm_backward(dout, cache):
     # version of batch normalization defined above. Your implementation should#
     # be very short; ours is less than five lines.                            #
     ###########################################################################
-    pass
+    N, C, H, W = dout.shape
+    flat_dx, dgamma, dbeta = batchnorm_backward(dout.transpose(0,2,3,1).reshape((N*H*W,C)), cache)
+    dx = flat_dx.reshape(N,H,W,C).transpose(0,3,1,2)
     ###########################################################################
     #                             END OF YOUR CODE                            #
     ###########################################################################