Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added data/mnist.pkl.gz
Binary file not shown.
85 changes: 85 additions & 0 deletions nn/mnist_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""
mnist_loader
~~~~~~~~~~~~

A library to load the MNIST image data. For details of the data
structures that are returned, see the doc strings for ``load_data``
and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the
function usually called by our neural network code.
"""

#### Libraries
# Standard library
import _pickle as cPickle
import gzip

# Third-party libraries
import numpy as np

def load_data():
"""Return the MNIST data as a tuple containing the training data,
the validation data, and the test data.

The ``training_data`` is returned as a tuple with two entries.
The first entry contains the actual training images. This is a
numpy ndarray with 50,000 entries. Each entry is, in turn, a
numpy ndarray with 784 values, representing the 28 * 28 = 784
pixels in a single MNIST image.

The second entry in the ``training_data`` tuple is a numpy ndarray
containing 50,000 entries. Those entries are just the digit
values (0...9) for the corresponding images contained in the first
entry of the tuple.

The ``validation_data`` and ``test_data`` are similar, except
each contains only 10,000 images.

This is a nice data format, but for use in neural networks it's
helpful to modify the format of the ``training_data`` a little.
That's done in the wrapper function ``load_data_wrapper()``, see
below.
"""
f = gzip.open('../data/mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = cPickle.load(f)
f.close()
return (training_data, validation_data, test_data)

def load_data_wrapper():
"""Return a tuple containing ``(training_data, validation_data,
test_data)``. Based on ``load_data``, but the format is more
convenient for use in our implementation of neural networks.

In particular, ``training_data`` is a list containing 50,000
2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
containing the input image. ``y`` is a 10-dimensional
numpy.ndarray representing the unit vector corresponding to the
correct digit for ``x``.

``validation_data`` and ``test_data`` are lists containing 10,000
2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
numpy.ndarry containing the input image, and ``y`` is the
corresponding classification, i.e., the digit values (integers)
corresponding to ``x``.

Obviously, this means we're using slightly different formats for
the training data and the validation / test data. These formats
turn out to be the most convenient for use in our neural network
code."""
tr_d, va_d, te_d = load_data()
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
training_results = [vectorized_result(y) for y in tr_d[1]]
training_data = zip(training_inputs, training_results)
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = zip(validation_inputs, va_d[1])
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = zip(test_inputs, te_d[1])
return (training_data, validation_data, test_data)

def vectorized_result(j):
"""Return a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output from the neural
network."""
e = np.zeros((10, 1))
e[j] = 1.0
return e
111 changes: 111 additions & 0 deletions nn/simplenn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import numpy as np
import random

class SimpleNN():

def __init__(self, sizes):
# sizes is an array with the number of units in each layer
# [2,3,1] means w neurons of input, 3 in the hidden layer and 1 as output
self.num_layers = len(sizes)
self.sizes = sizes
# the syntax [1:] gets all elements of sizes array beginning at index 1 (second position)
# np,random.randn(rows, cols) retuns a matrix of random elements
# np.random.randn(2,1) =>
# array([[ 0.68265325],
# [-0.52939261]])
# biases will have one vector per layer
self.biases = [np.random.randn(y,1) for y in sizes[1:]]
#zip returns a tuple in which x is the element of the first array and y the element of the second
#sizes[:-1] returns all the elements till the second to last
#sizes[1:] returns all the elements from the second and on]
# [2,3,1] means:
# * matrix of 3 rows and 2 columns -- will be multiplied by the inputs
# * matrix of 1 row and 3 columns -- will multiply the hidden layer and produce the output
self.weights = [np.random.randn(y,x) for x,y in zip(sizes[:-1],sizes[1:])]

def feedforward(self,a):
for b,w in zip(self.biases, self.weights):
a = sigmoid(np.dot(w,a) + b)
return a

def separate_batches(self,training_data,batch_size):
random.shuffle(training_data)
n = len(training_data)
# extracts chunks of data from the training set
# the xrange function will return indices starting with 0 untill n, with a step size o batch_size
# batches, then, will have several chunks of the main set, each defined by the batch_size_variable
return [training_data[i:i + batch_size] for i in xrange(0, n, batch_size)]

def update_batches(self,batches,alpha):
for batch in batches:
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]

m = len(batch)

# x is a array of length 784
# y is a single value indicating the digit represented by the 784 elements
for x, y in batch:
delta_b, delta_w = self.backpropagation(x, y)
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_b)]
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_w)]

self.weights = [w - (alpha / m) * nw for w, nw in zip(self.weights, nabla_w)]
self.biases = [b - (alpha / m) * nb for b, nb in zip(self.biases, nabla_b)]


def backpropagation(self, x, y):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]

activation = x
activations = [x]
zs = []
for b, w in zip(self.biases, self.weights):
# layer-bound b and w
z = np.dot(w, activation)+b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
# backward pass
delta = self.cost_derivative(activations[-1], y) * \
sigmoid_prime(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, activations[-2].transpose())

for l in xrange(2, self.num_layers):
z = zs[-l]
sp = sigmoid_prime(z)
delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
return (nabla_b, nabla_w)

def SGD(self,training_data,epochs,batch_size,alpha,test_data):
n_test = len(test_data)

for epoch in xrange(epochs):
batches = self.separate_batches(training_data,batch_size)
self.update_batches(batches,alpha)

print("Epoch {0}: {1} / {2}".format(epoch, self.evaluate(test_data), n_test))

def evaluate(self, test_data):
test_results = [(np.argmax(self.feedforward(x)), y)
for (x, y) in test_data]
return sum(int(x == y) for (x, y) in test_results)

def cost_derivative(self, output_activations, y):
return (output_activations-y)

def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))

def sigmoid_prime(z):
return sigmoid(z)*(1-sigmoid(z))

import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

net = SimpleNN([784,30,10])
net.SGD(training_data,30,10,3.0,test_data)