diff --git a/data/mnist.pkl.gz b/data/mnist.pkl.gz new file mode 100644 index 0000000..059aba0 Binary files /dev/null and b/data/mnist.pkl.gz differ diff --git a/nn/mnist_loader.py b/nn/mnist_loader.py new file mode 100644 index 0000000..8ba1e83 --- /dev/null +++ b/nn/mnist_loader.py @@ -0,0 +1,85 @@ +""" +mnist_loader +~~~~~~~~~~~~ + +A library to load the MNIST image data. For details of the data +structures that are returned, see the doc strings for ``load_data`` +and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the +function usually called by our neural network code. +""" + +#### Libraries +# Standard library +import _pickle as cPickle +import gzip + +# Third-party libraries +import numpy as np + +def load_data(): + """Return the MNIST data as a tuple containing the training data, + the validation data, and the test data. + + The ``training_data`` is returned as a tuple with two entries. + The first entry contains the actual training images. This is a + numpy ndarray with 50,000 entries. Each entry is, in turn, a + numpy ndarray with 784 values, representing the 28 * 28 = 784 + pixels in a single MNIST image. + + The second entry in the ``training_data`` tuple is a numpy ndarray + containing 50,000 entries. Those entries are just the digit + values (0...9) for the corresponding images contained in the first + entry of the tuple. + + The ``validation_data`` and ``test_data`` are similar, except + each contains only 10,000 images. + + This is a nice data format, but for use in neural networks it's + helpful to modify the format of the ``training_data`` a little. + That's done in the wrapper function ``load_data_wrapper()``, see + below. + """ + f = gzip.open('../data/mnist.pkl.gz', 'rb') + training_data, validation_data, test_data = cPickle.load(f) + f.close() + return (training_data, validation_data, test_data) + +def load_data_wrapper(): + """Return a tuple containing ``(training_data, validation_data, + test_data)``. Based on ``load_data``, but the format is more + convenient for use in our implementation of neural networks. + + In particular, ``training_data`` is a list containing 50,000 + 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray + containing the input image. ``y`` is a 10-dimensional + numpy.ndarray representing the unit vector corresponding to the + correct digit for ``x``. + + ``validation_data`` and ``test_data`` are lists containing 10,000 + 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional + numpy.ndarry containing the input image, and ``y`` is the + corresponding classification, i.e., the digit values (integers) + corresponding to ``x``. + + Obviously, this means we're using slightly different formats for + the training data and the validation / test data. These formats + turn out to be the most convenient for use in our neural network + code.""" + tr_d, va_d, te_d = load_data() + training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] + training_results = [vectorized_result(y) for y in tr_d[1]] + training_data = zip(training_inputs, training_results) + validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] + validation_data = zip(validation_inputs, va_d[1]) + test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] + test_data = zip(test_inputs, te_d[1]) + return (training_data, validation_data, test_data) + +def vectorized_result(j): + """Return a 10-dimensional unit vector with a 1.0 in the jth + position and zeroes elsewhere. This is used to convert a digit + (0...9) into a corresponding desired output from the neural + network.""" + e = np.zeros((10, 1)) + e[j] = 1.0 + return e diff --git a/nn/simplenn.py b/nn/simplenn.py new file mode 100644 index 0000000..04767db --- /dev/null +++ b/nn/simplenn.py @@ -0,0 +1,111 @@ +import numpy as np +import random + +class SimpleNN(): + + def __init__(self, sizes): + # sizes is an array with the number of units in each layer + # [2,3,1] means w neurons of input, 3 in the hidden layer and 1 as output + self.num_layers = len(sizes) + self.sizes = sizes + # the syntax [1:] gets all elements of sizes array beginning at index 1 (second position) + # np,random.randn(rows, cols) retuns a matrix of random elements + # np.random.randn(2,1) => + # array([[ 0.68265325], + # [-0.52939261]]) + # biases will have one vector per layer + self.biases = [np.random.randn(y,1) for y in sizes[1:]] + #zip returns a tuple in which x is the element of the first array and y the element of the second + #sizes[:-1] returns all the elements till the second to last + #sizes[1:] returns all the elements from the second and on] + # [2,3,1] means: + # * matrix of 3 rows and 2 columns -- will be multiplied by the inputs + # * matrix of 1 row and 3 columns -- will multiply the hidden layer and produce the output + self.weights = [np.random.randn(y,x) for x,y in zip(sizes[:-1],sizes[1:])] + + def feedforward(self,a): + for b,w in zip(self.biases, self.weights): + a = sigmoid(np.dot(w,a) + b) + return a + + def separate_batches(self,training_data,batch_size): + random.shuffle(training_data) + n = len(training_data) + # extracts chunks of data from the training set + # the xrange function will return indices starting with 0 untill n, with a step size o batch_size + # batches, then, will have several chunks of the main set, each defined by the batch_size_variable + return [training_data[i:i + batch_size] for i in xrange(0, n, batch_size)] + + def update_batches(self,batches,alpha): + for batch in batches: + nabla_b = [np.zeros(b.shape) for b in self.biases] + nabla_w = [np.zeros(w.shape) for w in self.weights] + + m = len(batch) + + # x is a array of length 784 + # y is a single value indicating the digit represented by the 784 elements + for x, y in batch: + delta_b, delta_w = self.backpropagation(x, y) + nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_b)] + nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_w)] + + self.weights = [w - (alpha / m) * nw for w, nw in zip(self.weights, nabla_w)] + self.biases = [b - (alpha / m) * nb for b, nb in zip(self.biases, nabla_b)] + + + def backpropagation(self, x, y): + nabla_b = [np.zeros(b.shape) for b in self.biases] + nabla_w = [np.zeros(w.shape) for w in self.weights] + + activation = x + activations = [x] + zs = [] + for b, w in zip(self.biases, self.weights): + # layer-bound b and w + z = np.dot(w, activation)+b + zs.append(z) + activation = sigmoid(z) + activations.append(activation) + # backward pass + delta = self.cost_derivative(activations[-1], y) * \ + sigmoid_prime(zs[-1]) + nabla_b[-1] = delta + nabla_w[-1] = np.dot(delta, activations[-2].transpose()) + + for l in xrange(2, self.num_layers): + z = zs[-l] + sp = sigmoid_prime(z) + delta = np.dot(self.weights[-l+1].transpose(), delta) * sp + nabla_b[-l] = delta + nabla_w[-l] = np.dot(delta, activations[-l-1].transpose()) + return (nabla_b, nabla_w) + + def SGD(self,training_data,epochs,batch_size,alpha,test_data): + n_test = len(test_data) + + for epoch in xrange(epochs): + batches = self.separate_batches(training_data,batch_size) + self.update_batches(batches,alpha) + + print("Epoch {0}: {1} / {2}".format(epoch, self.evaluate(test_data), n_test)) + + def evaluate(self, test_data): + test_results = [(np.argmax(self.feedforward(x)), y) + for (x, y) in test_data] + return sum(int(x == y) for (x, y) in test_results) + + def cost_derivative(self, output_activations, y): + return (output_activations-y) + +def sigmoid(z): + return 1.0 / (1.0 + np.exp(-z)) + +def sigmoid_prime(z): + return sigmoid(z)*(1-sigmoid(z)) + +import mnist_loader +training_data, validation_data, test_data = mnist_loader.load_data_wrapper() + +net = SimpleNN([784,30,10]) +net.SGD(training_data,30,10,3.0,test_data) \ No newline at end of file