From 5e2e339e0c3e573feab57c0f3d8157a340f1d002 Mon Sep 17 00:00:00 2001 From: Steven Kolawole <45284829+SteveKola@users.noreply.github.com> Date: Wed, 22 Apr 2020 12:20:32 +0100 Subject: [PATCH 1/9] Create empty.gitkeep --- Kolawole_Steven/empty.gitkeep | 1 + 1 file changed, 1 insertion(+) create mode 100644 Kolawole_Steven/empty.gitkeep diff --git a/Kolawole_Steven/empty.gitkeep b/Kolawole_Steven/empty.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Kolawole_Steven/empty.gitkeep @@ -0,0 +1 @@ + From 27653cda4265ff290e20faacca4e6d3a7354b9a2 Mon Sep 17 00:00:00 2001 From: Steven Kolawole <45284829+SteveKola@users.noreply.github.com> Date: Wed, 22 Apr 2020 22:21:04 +0100 Subject: [PATCH 2/9] Add files via upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Customized Logistic Regression with Nesterov Accelerated Gradient with Early Stopping option: Nesterov Accelerated Gradient is theorized to converge by at least, a 10 times faster rate than Stochastic Gradient Descent, and over 25 times faster rate than the naive batch gradient descent. Nesterov Gradient combines the properties of Stochastic Gradient Descent -which supposedly have the properties that allows it to “jump” out of shallow local minima giving it a better chance of finding a true global minimum - with a 'smarter' momentum, that has a somewhat prescient notion of the global minimum, and knows to slow down before the hill slopes up again. But there is a catch; Converging too fast makes it easier for the model to overfit, causing the well-known bias-variance tradeoff. A way to avoid that is to introduce Early Stopping, which works by simply waiting for a certain number of epochs with no improvement in validation loss, and then stopping the iterations. --- .../Logistic Regression from Scratch.ipynb | 322 ++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 Kolawole_Steven/Logistic Regression from Scratch.ipynb diff --git a/Kolawole_Steven/Logistic Regression from Scratch.ipynb b/Kolawole_Steven/Logistic Regression from Scratch.ipynb new file mode 100644 index 0000000..6031d92 --- /dev/null +++ b/Kolawole_Steven/Logistic Regression from Scratch.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Logistic Regression from Scratch\n", + "## - Steven Kolawole " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Intro\n", + "\n", + "Logistic Regression is simply a Linear Regression with a Sigmoid function at its end.\n", + "\n", + "The Sigmoid function generates probability (i.e. outputs between 0 and 1) for all values of X." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "def sigmoid_fxn(x):\n", + " yhat = list(map(lambda i: 1 / (1 + np.exp(-i)), x))\n", + " return yhat\n", + "\n", + "x = np.arange(-10., 10., 0.2)\n", + "logit = sigmoid_fxn(x)\n", + "plt.title(\"The Sigmoid Function Curve\", fontsize=15)\n", + "\n", + "plt.plot(x, logit)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Building my Customized Logistic Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "class LogisticClassifier():\n", + " def __init__(self, \n", + " learning_rate=0.1, \n", + " tolerance=1e-4, \n", + " max_iter=1000, \n", + " batch_size=32, \n", + " momentum_decay=0.9, \n", + " early_stopping=3, \n", + " validation_set=(None,None)):\n", + " \n", + " # Gradient descent parameters\n", + " self.learning_rate = float(learning_rate)\n", + " self.tolerance = float(tolerance)\n", + " self.max_iter = int(max_iter)\n", + " self.batch_size=32\n", + " self.momentum_decay = float(momentum_decay)\n", + " self.early_stopping = int(early_stopping)\n", + " self.X_validation, self.y_validation = validation_set\n", + " \n", + " # to construct the design matrix\n", + " self.add_intercept = True\n", + " self.center = True \n", + " self.scale = True\n", + " \n", + " self.training_loss_history = []\n", + " \n", + " def __sigmoid(self, X):\n", + " return 1 / (1 + np.exp(-X))\n", + " \n", + " # z-score normalization and intercept addition\n", + " def __design_matrix(self, X):\n", + " if self.center:\n", + " X = X - self.means\n", + " if self.scale:\n", + " X = X / self.standard_error\n", + " if self.add_intercept:\n", + " intercept = np.ones((X.shape[0], 1))\n", + " X = np.hstack([intercept, X])\n", + " \n", + " return X\n", + " \n", + " def __fit_center_scale(self, X):\n", + " self.means = X.mean(axis=0)\n", + " self.standard_error = np.std(X, axis=0)\n", + " \n", + " def fit(self, X, y):\n", + " self.__fit_center_scale(X)\n", + "\n", + " n, k = X.shape\n", + " \n", + " # add intercept column to the design matrix\n", + " X = self.__design_matrix(X)\n", + "\n", + " # used for the convergence check\n", + " previous_loss = -float('inf')\n", + " self.converged = False\n", + " self.stopped_early = False\n", + " \n", + " # initialize parameters\n", + " self.beta = np.zeros(k + (1 if self.add_intercept else 0))\n", + " momentum = self.beta * 0 # to get the same shape and dtype as beta\n", + " \n", + " for i in range(self.max_iter):\n", + " shuffle = np.random.permutation(len(y))\n", + " X = X[shuffle, :]\n", + " y = y[shuffle]\n", + " \n", + " # we'll add one more batch, incase the batch size doesn't divide n evenly\n", + " extra = (1 if n % self.batch_size else 0)\n", + "\n", + " for batch_index in range(n // self.batch_size + extra):\n", + " batch_slice = slice(\n", + " self.batch_size * batch_index, \n", + " self.batch_size * (batch_index + 1) )\n", + " X_batch = X[batch_slice, :]\n", + " y_batch = y[batch_slice]\n", + " \n", + " beta_ahead = self.beta + self.momentum_decay * momentum\n", + " y_hat = self.__sigmoid(np.dot(X_batch, self.beta))\n", + " \n", + " # gradient descent\n", + " residuals = (y_hat - y_batch).reshape( (X_batch.shape[0], 1) )\n", + " gradient = (X_batch * residuals).mean(axis=0)\n", + " momentum = self.momentum_decay * momentum - self.learning_rate * gradient\n", + " self.beta += momentum\n", + "\n", + " # with minibatch, we only check convergence at the end of every epoch. \n", + " y_hat = self.__sigmoid(np.dot(X, self.beta))\n", + " self.loss = np.mean(-y * np.log(y_hat) - (1-y) * np.log(1-y_hat))\n", + " self.training_loss_history.append(self.loss)\n", + " \n", + " # early stopping\n", + " if self.check_validation_loss():\n", + " self.stopped_early = True\n", + " break \n", + " \n", + " if abs(previous_loss - self.loss) < self.tolerance:\n", + " self.converged = True\n", + " break\n", + " else:\n", + " previous_loss = self.loss\n", + " \n", + " self.iterations = i+1\n", + " \n", + " def predict_proba(self, X):\n", + " # add intercept column to the design matrix\n", + " X = self.__design_matrix(X)\n", + " return self.__sigmoid(np.dot(X, self.beta))\n", + "\n", + " def predict(self, X):\n", + " predictions = self.predict_proba(X).round()\n", + " return predictions\n", + "\n", + " def check_validation_loss(self):\n", + " # validation set loss\n", + " if not hasattr(self, 'validation_loss_history'):\n", + " self.validation_loss_history = []\n", + " p_hat = self.predict_proba(self.X_validation)\n", + " loss = np.mean(-self.y_validation * np.log(p_hat) - \\\n", + " (1-self.y_validation) * np.log(1-p_hat))\n", + " self.validation_loss_history.append(loss)\n", + " \n", + " t = self.early_stopping\n", + " if t and len(self.validation_loss_history) > t * 2:\n", + " recent_best = min(self.validation_loss_history[-t:])\n", + " previous_best = min(self.validation_loss_history[:-t])\n", + " if recent_best > previous_best:\n", + " return True\n", + " return False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing and Evaluating the Customized Model" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_breast_cancer\n", + "from sklearn.model_selection import train_test_split as tts\n", + "\n", + "dummy_data = load_breast_cancer()\n", + "X, y = dummy_data.data, dummy_data.target\n", + "\n", + "X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=103)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticClassifier(learning_rate = 0.01,\n", + " tolerance=1e-5,\n", + " max_iter=2000,\n", + " early_stopping=3,\n", + " validation_set=(X_test, y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# defining the metrics function\n", + "def accuracy(predictions, actual):\n", + " return sum(predictions == actual) / len(actual)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9736842105263158" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy(y_pred, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 636d0024ef34e3221ba9643f06f9846c5d173d91 Mon Sep 17 00:00:00 2001 From: Steven Kolawole <45284829+SteveKola@users.noreply.github.com> Date: Wed, 22 Apr 2020 22:21:52 +0100 Subject: [PATCH 3/9] Delete empty.gitkeep --- Kolawole_Steven/empty.gitkeep | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Kolawole_Steven/empty.gitkeep diff --git a/Kolawole_Steven/empty.gitkeep b/Kolawole_Steven/empty.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/Kolawole_Steven/empty.gitkeep +++ /dev/null @@ -1 +0,0 @@ - From 9fddf40e8b8d3c692f7afb2d9b8f1a6430b391f0 Mon Sep 17 00:00:00 2001 From: Steven Kolawole <45284829+SteveKola@users.noreply.github.com> Date: Wed, 22 Apr 2020 22:33:55 +0100 Subject: [PATCH 4/9] Delete Logistic Regression from Scratch.ipynb --- .../Logistic Regression from Scratch.ipynb | 322 ------------------ 1 file changed, 322 deletions(-) delete mode 100644 Kolawole_Steven/Logistic Regression from Scratch.ipynb diff --git a/Kolawole_Steven/Logistic Regression from Scratch.ipynb b/Kolawole_Steven/Logistic Regression from Scratch.ipynb deleted file mode 100644 index 6031d92..0000000 --- a/Kolawole_Steven/Logistic Regression from Scratch.ipynb +++ /dev/null @@ -1,322 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Logistic Regression from Scratch\n", - "## - Steven Kolawole " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Intro\n", - "\n", - "Logistic Regression is simply a Linear Regression with a Sigmoid function at its end.\n", - "\n", - "The Sigmoid function generates probability (i.e. outputs between 0 and 1) for all values of X." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "def sigmoid_fxn(x):\n", - " yhat = list(map(lambda i: 1 / (1 + np.exp(-i)), x))\n", - " return yhat\n", - "\n", - "x = np.arange(-10., 10., 0.2)\n", - "logit = sigmoid_fxn(x)\n", - "plt.title(\"The Sigmoid Function Curve\", fontsize=15)\n", - "\n", - "plt.plot(x, logit)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Building my Customized Logistic Regression" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "class LogisticClassifier():\n", - " def __init__(self, \n", - " learning_rate=0.1, \n", - " tolerance=1e-4, \n", - " max_iter=1000, \n", - " batch_size=32, \n", - " momentum_decay=0.9, \n", - " early_stopping=3, \n", - " validation_set=(None,None)):\n", - " \n", - " # Gradient descent parameters\n", - " self.learning_rate = float(learning_rate)\n", - " self.tolerance = float(tolerance)\n", - " self.max_iter = int(max_iter)\n", - " self.batch_size=32\n", - " self.momentum_decay = float(momentum_decay)\n", - " self.early_stopping = int(early_stopping)\n", - " self.X_validation, self.y_validation = validation_set\n", - " \n", - " # to construct the design matrix\n", - " self.add_intercept = True\n", - " self.center = True \n", - " self.scale = True\n", - " \n", - " self.training_loss_history = []\n", - " \n", - " def __sigmoid(self, X):\n", - " return 1 / (1 + np.exp(-X))\n", - " \n", - " # z-score normalization and intercept addition\n", - " def __design_matrix(self, X):\n", - " if self.center:\n", - " X = X - self.means\n", - " if self.scale:\n", - " X = X / self.standard_error\n", - " if self.add_intercept:\n", - " intercept = np.ones((X.shape[0], 1))\n", - " X = np.hstack([intercept, X])\n", - " \n", - " return X\n", - " \n", - " def __fit_center_scale(self, X):\n", - " self.means = X.mean(axis=0)\n", - " self.standard_error = np.std(X, axis=0)\n", - " \n", - " def fit(self, X, y):\n", - " self.__fit_center_scale(X)\n", - "\n", - " n, k = X.shape\n", - " \n", - " # add intercept column to the design matrix\n", - " X = self.__design_matrix(X)\n", - "\n", - " # used for the convergence check\n", - " previous_loss = -float('inf')\n", - " self.converged = False\n", - " self.stopped_early = False\n", - " \n", - " # initialize parameters\n", - " self.beta = np.zeros(k + (1 if self.add_intercept else 0))\n", - " momentum = self.beta * 0 # to get the same shape and dtype as beta\n", - " \n", - " for i in range(self.max_iter):\n", - " shuffle = np.random.permutation(len(y))\n", - " X = X[shuffle, :]\n", - " y = y[shuffle]\n", - " \n", - " # we'll add one more batch, incase the batch size doesn't divide n evenly\n", - " extra = (1 if n % self.batch_size else 0)\n", - "\n", - " for batch_index in range(n // self.batch_size + extra):\n", - " batch_slice = slice(\n", - " self.batch_size * batch_index, \n", - " self.batch_size * (batch_index + 1) )\n", - " X_batch = X[batch_slice, :]\n", - " y_batch = y[batch_slice]\n", - " \n", - " beta_ahead = self.beta + self.momentum_decay * momentum\n", - " y_hat = self.__sigmoid(np.dot(X_batch, self.beta))\n", - " \n", - " # gradient descent\n", - " residuals = (y_hat - y_batch).reshape( (X_batch.shape[0], 1) )\n", - " gradient = (X_batch * residuals).mean(axis=0)\n", - " momentum = self.momentum_decay * momentum - self.learning_rate * gradient\n", - " self.beta += momentum\n", - "\n", - " # with minibatch, we only check convergence at the end of every epoch. \n", - " y_hat = self.__sigmoid(np.dot(X, self.beta))\n", - " self.loss = np.mean(-y * np.log(y_hat) - (1-y) * np.log(1-y_hat))\n", - " self.training_loss_history.append(self.loss)\n", - " \n", - " # early stopping\n", - " if self.check_validation_loss():\n", - " self.stopped_early = True\n", - " break \n", - " \n", - " if abs(previous_loss - self.loss) < self.tolerance:\n", - " self.converged = True\n", - " break\n", - " else:\n", - " previous_loss = self.loss\n", - " \n", - " self.iterations = i+1\n", - " \n", - " def predict_proba(self, X):\n", - " # add intercept column to the design matrix\n", - " X = self.__design_matrix(X)\n", - " return self.__sigmoid(np.dot(X, self.beta))\n", - "\n", - " def predict(self, X):\n", - " predictions = self.predict_proba(X).round()\n", - " return predictions\n", - "\n", - " def check_validation_loss(self):\n", - " # validation set loss\n", - " if not hasattr(self, 'validation_loss_history'):\n", - " self.validation_loss_history = []\n", - " p_hat = self.predict_proba(self.X_validation)\n", - " loss = np.mean(-self.y_validation * np.log(p_hat) - \\\n", - " (1-self.y_validation) * np.log(1-p_hat))\n", - " self.validation_loss_history.append(loss)\n", - " \n", - " t = self.early_stopping\n", - " if t and len(self.validation_loss_history) > t * 2:\n", - " recent_best = min(self.validation_loss_history[-t:])\n", - " previous_best = min(self.validation_loss_history[:-t])\n", - " if recent_best > previous_best:\n", - " return True\n", - " return False" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Testing and Evaluating the Customized Model" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.datasets import load_breast_cancer\n", - "from sklearn.model_selection import train_test_split as tts\n", - "\n", - "dummy_data = load_breast_cancer()\n", - "X, y = dummy_data.data, dummy_data.target\n", - "\n", - "X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=103)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "model = LogisticClassifier(learning_rate = 0.01,\n", - " tolerance=1e-5,\n", - " max_iter=2000,\n", - " early_stopping=3,\n", - " validation_set=(X_test, y_test))" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "model.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "y_pred = model.predict(X_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "# defining the metrics function\n", - "def accuracy(predictions, actual):\n", - " return sum(predictions == actual) / len(actual)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9736842105263158" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy(y_pred, y_test)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 4a6e86bbad037966916a8452ad2a9abd8058c84d Mon Sep 17 00:00:00 2001 From: Steven Kolawole <45284829+SteveKola@users.noreply.github.com> Date: Wed, 22 Apr 2020 22:35:34 +0100 Subject: [PATCH 5/9] Add files via upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Customized Logistic Regression with Nesterov Accelerated Gradient with Early Stopping option: Nesterov Accelerated Gradient is theorized to converge by at least, a 10 times faster rate than Stochastic Gradient Descent, and over 25 times faster rate than the naive batch gradient descent. Nesterov Gradient combines the properties of Stochastic Gradient Descent -which supposedly have the properties that allows it to “jump” out of shallow local minima giving it a better chance of finding a true global minimum- with a 'smarter' momentum, that has a somewhat prescient notion of the global minimum, and knows to slow down before the hill slopes up again. But there is a catch; Converging too fast makes it easier for the model to overfit, causing the well-known bias-variance tradeoff. My way of avoiding that is to introduce Early Stopping, which works by simply waiting for a certain number of epochs with no improvement in validation loss. --- Logistic Regression from Scratch.ipynb | 322 +++++++++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 Logistic Regression from Scratch.ipynb diff --git a/Logistic Regression from Scratch.ipynb b/Logistic Regression from Scratch.ipynb new file mode 100644 index 0000000..6031d92 --- /dev/null +++ b/Logistic Regression from Scratch.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Logistic Regression from Scratch\n", + "## - Steven Kolawole " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Intro\n", + "\n", + "Logistic Regression is simply a Linear Regression with a Sigmoid function at its end.\n", + "\n", + "The Sigmoid function generates probability (i.e. outputs between 0 and 1) for all values of X." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "def sigmoid_fxn(x):\n", + " yhat = list(map(lambda i: 1 / (1 + np.exp(-i)), x))\n", + " return yhat\n", + "\n", + "x = np.arange(-10., 10., 0.2)\n", + "logit = sigmoid_fxn(x)\n", + "plt.title(\"The Sigmoid Function Curve\", fontsize=15)\n", + "\n", + "plt.plot(x, logit)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Building my Customized Logistic Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "class LogisticClassifier():\n", + " def __init__(self, \n", + " learning_rate=0.1, \n", + " tolerance=1e-4, \n", + " max_iter=1000, \n", + " batch_size=32, \n", + " momentum_decay=0.9, \n", + " early_stopping=3, \n", + " validation_set=(None,None)):\n", + " \n", + " # Gradient descent parameters\n", + " self.learning_rate = float(learning_rate)\n", + " self.tolerance = float(tolerance)\n", + " self.max_iter = int(max_iter)\n", + " self.batch_size=32\n", + " self.momentum_decay = float(momentum_decay)\n", + " self.early_stopping = int(early_stopping)\n", + " self.X_validation, self.y_validation = validation_set\n", + " \n", + " # to construct the design matrix\n", + " self.add_intercept = True\n", + " self.center = True \n", + " self.scale = True\n", + " \n", + " self.training_loss_history = []\n", + " \n", + " def __sigmoid(self, X):\n", + " return 1 / (1 + np.exp(-X))\n", + " \n", + " # z-score normalization and intercept addition\n", + " def __design_matrix(self, X):\n", + " if self.center:\n", + " X = X - self.means\n", + " if self.scale:\n", + " X = X / self.standard_error\n", + " if self.add_intercept:\n", + " intercept = np.ones((X.shape[0], 1))\n", + " X = np.hstack([intercept, X])\n", + " \n", + " return X\n", + " \n", + " def __fit_center_scale(self, X):\n", + " self.means = X.mean(axis=0)\n", + " self.standard_error = np.std(X, axis=0)\n", + " \n", + " def fit(self, X, y):\n", + " self.__fit_center_scale(X)\n", + "\n", + " n, k = X.shape\n", + " \n", + " # add intercept column to the design matrix\n", + " X = self.__design_matrix(X)\n", + "\n", + " # used for the convergence check\n", + " previous_loss = -float('inf')\n", + " self.converged = False\n", + " self.stopped_early = False\n", + " \n", + " # initialize parameters\n", + " self.beta = np.zeros(k + (1 if self.add_intercept else 0))\n", + " momentum = self.beta * 0 # to get the same shape and dtype as beta\n", + " \n", + " for i in range(self.max_iter):\n", + " shuffle = np.random.permutation(len(y))\n", + " X = X[shuffle, :]\n", + " y = y[shuffle]\n", + " \n", + " # we'll add one more batch, incase the batch size doesn't divide n evenly\n", + " extra = (1 if n % self.batch_size else 0)\n", + "\n", + " for batch_index in range(n // self.batch_size + extra):\n", + " batch_slice = slice(\n", + " self.batch_size * batch_index, \n", + " self.batch_size * (batch_index + 1) )\n", + " X_batch = X[batch_slice, :]\n", + " y_batch = y[batch_slice]\n", + " \n", + " beta_ahead = self.beta + self.momentum_decay * momentum\n", + " y_hat = self.__sigmoid(np.dot(X_batch, self.beta))\n", + " \n", + " # gradient descent\n", + " residuals = (y_hat - y_batch).reshape( (X_batch.shape[0], 1) )\n", + " gradient = (X_batch * residuals).mean(axis=0)\n", + " momentum = self.momentum_decay * momentum - self.learning_rate * gradient\n", + " self.beta += momentum\n", + "\n", + " # with minibatch, we only check convergence at the end of every epoch. \n", + " y_hat = self.__sigmoid(np.dot(X, self.beta))\n", + " self.loss = np.mean(-y * np.log(y_hat) - (1-y) * np.log(1-y_hat))\n", + " self.training_loss_history.append(self.loss)\n", + " \n", + " # early stopping\n", + " if self.check_validation_loss():\n", + " self.stopped_early = True\n", + " break \n", + " \n", + " if abs(previous_loss - self.loss) < self.tolerance:\n", + " self.converged = True\n", + " break\n", + " else:\n", + " previous_loss = self.loss\n", + " \n", + " self.iterations = i+1\n", + " \n", + " def predict_proba(self, X):\n", + " # add intercept column to the design matrix\n", + " X = self.__design_matrix(X)\n", + " return self.__sigmoid(np.dot(X, self.beta))\n", + "\n", + " def predict(self, X):\n", + " predictions = self.predict_proba(X).round()\n", + " return predictions\n", + "\n", + " def check_validation_loss(self):\n", + " # validation set loss\n", + " if not hasattr(self, 'validation_loss_history'):\n", + " self.validation_loss_history = []\n", + " p_hat = self.predict_proba(self.X_validation)\n", + " loss = np.mean(-self.y_validation * np.log(p_hat) - \\\n", + " (1-self.y_validation) * np.log(1-p_hat))\n", + " self.validation_loss_history.append(loss)\n", + " \n", + " t = self.early_stopping\n", + " if t and len(self.validation_loss_history) > t * 2:\n", + " recent_best = min(self.validation_loss_history[-t:])\n", + " previous_best = min(self.validation_loss_history[:-t])\n", + " if recent_best > previous_best:\n", + " return True\n", + " return False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing and Evaluating the Customized Model" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_breast_cancer\n", + "from sklearn.model_selection import train_test_split as tts\n", + "\n", + "dummy_data = load_breast_cancer()\n", + "X, y = dummy_data.data, dummy_data.target\n", + "\n", + "X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=103)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticClassifier(learning_rate = 0.01,\n", + " tolerance=1e-5,\n", + " max_iter=2000,\n", + " early_stopping=3,\n", + " validation_set=(X_test, y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# defining the metrics function\n", + "def accuracy(predictions, actual):\n", + " return sum(predictions == actual) / len(actual)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9736842105263158" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy(y_pred, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From f9b9081bc03172e6f6e11a5f585a81c64e367dd9 Mon Sep 17 00:00:00 2001 From: Steven Kolawole <45284829+SteveKola@users.noreply.github.com> Date: Wed, 22 Apr 2020 22:36:31 +0100 Subject: [PATCH 6/9] Delete Logistic Regression from Scratch.ipynb --- Logistic Regression from Scratch.ipynb | 322 ------------------------- 1 file changed, 322 deletions(-) delete mode 100644 Logistic Regression from Scratch.ipynb diff --git a/Logistic Regression from Scratch.ipynb b/Logistic Regression from Scratch.ipynb deleted file mode 100644 index 6031d92..0000000 --- a/Logistic Regression from Scratch.ipynb +++ /dev/null @@ -1,322 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Logistic Regression from Scratch\n", - "## - Steven Kolawole " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Intro\n", - "\n", - "Logistic Regression is simply a Linear Regression with a Sigmoid function at its end.\n", - "\n", - "The Sigmoid function generates probability (i.e. outputs between 0 and 1) for all values of X." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "def sigmoid_fxn(x):\n", - " yhat = list(map(lambda i: 1 / (1 + np.exp(-i)), x))\n", - " return yhat\n", - "\n", - "x = np.arange(-10., 10., 0.2)\n", - "logit = sigmoid_fxn(x)\n", - "plt.title(\"The Sigmoid Function Curve\", fontsize=15)\n", - "\n", - "plt.plot(x, logit)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Building my Customized Logistic Regression" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "class LogisticClassifier():\n", - " def __init__(self, \n", - " learning_rate=0.1, \n", - " tolerance=1e-4, \n", - " max_iter=1000, \n", - " batch_size=32, \n", - " momentum_decay=0.9, \n", - " early_stopping=3, \n", - " validation_set=(None,None)):\n", - " \n", - " # Gradient descent parameters\n", - " self.learning_rate = float(learning_rate)\n", - " self.tolerance = float(tolerance)\n", - " self.max_iter = int(max_iter)\n", - " self.batch_size=32\n", - " self.momentum_decay = float(momentum_decay)\n", - " self.early_stopping = int(early_stopping)\n", - " self.X_validation, self.y_validation = validation_set\n", - " \n", - " # to construct the design matrix\n", - " self.add_intercept = True\n", - " self.center = True \n", - " self.scale = True\n", - " \n", - " self.training_loss_history = []\n", - " \n", - " def __sigmoid(self, X):\n", - " return 1 / (1 + np.exp(-X))\n", - " \n", - " # z-score normalization and intercept addition\n", - " def __design_matrix(self, X):\n", - " if self.center:\n", - " X = X - self.means\n", - " if self.scale:\n", - " X = X / self.standard_error\n", - " if self.add_intercept:\n", - " intercept = np.ones((X.shape[0], 1))\n", - " X = np.hstack([intercept, X])\n", - " \n", - " return X\n", - " \n", - " def __fit_center_scale(self, X):\n", - " self.means = X.mean(axis=0)\n", - " self.standard_error = np.std(X, axis=0)\n", - " \n", - " def fit(self, X, y):\n", - " self.__fit_center_scale(X)\n", - "\n", - " n, k = X.shape\n", - " \n", - " # add intercept column to the design matrix\n", - " X = self.__design_matrix(X)\n", - "\n", - " # used for the convergence check\n", - " previous_loss = -float('inf')\n", - " self.converged = False\n", - " self.stopped_early = False\n", - " \n", - " # initialize parameters\n", - " self.beta = np.zeros(k + (1 if self.add_intercept else 0))\n", - " momentum = self.beta * 0 # to get the same shape and dtype as beta\n", - " \n", - " for i in range(self.max_iter):\n", - " shuffle = np.random.permutation(len(y))\n", - " X = X[shuffle, :]\n", - " y = y[shuffle]\n", - " \n", - " # we'll add one more batch, incase the batch size doesn't divide n evenly\n", - " extra = (1 if n % self.batch_size else 0)\n", - "\n", - " for batch_index in range(n // self.batch_size + extra):\n", - " batch_slice = slice(\n", - " self.batch_size * batch_index, \n", - " self.batch_size * (batch_index + 1) )\n", - " X_batch = X[batch_slice, :]\n", - " y_batch = y[batch_slice]\n", - " \n", - " beta_ahead = self.beta + self.momentum_decay * momentum\n", - " y_hat = self.__sigmoid(np.dot(X_batch, self.beta))\n", - " \n", - " # gradient descent\n", - " residuals = (y_hat - y_batch).reshape( (X_batch.shape[0], 1) )\n", - " gradient = (X_batch * residuals).mean(axis=0)\n", - " momentum = self.momentum_decay * momentum - self.learning_rate * gradient\n", - " self.beta += momentum\n", - "\n", - " # with minibatch, we only check convergence at the end of every epoch. \n", - " y_hat = self.__sigmoid(np.dot(X, self.beta))\n", - " self.loss = np.mean(-y * np.log(y_hat) - (1-y) * np.log(1-y_hat))\n", - " self.training_loss_history.append(self.loss)\n", - " \n", - " # early stopping\n", - " if self.check_validation_loss():\n", - " self.stopped_early = True\n", - " break \n", - " \n", - " if abs(previous_loss - self.loss) < self.tolerance:\n", - " self.converged = True\n", - " break\n", - " else:\n", - " previous_loss = self.loss\n", - " \n", - " self.iterations = i+1\n", - " \n", - " def predict_proba(self, X):\n", - " # add intercept column to the design matrix\n", - " X = self.__design_matrix(X)\n", - " return self.__sigmoid(np.dot(X, self.beta))\n", - "\n", - " def predict(self, X):\n", - " predictions = self.predict_proba(X).round()\n", - " return predictions\n", - "\n", - " def check_validation_loss(self):\n", - " # validation set loss\n", - " if not hasattr(self, 'validation_loss_history'):\n", - " self.validation_loss_history = []\n", - " p_hat = self.predict_proba(self.X_validation)\n", - " loss = np.mean(-self.y_validation * np.log(p_hat) - \\\n", - " (1-self.y_validation) * np.log(1-p_hat))\n", - " self.validation_loss_history.append(loss)\n", - " \n", - " t = self.early_stopping\n", - " if t and len(self.validation_loss_history) > t * 2:\n", - " recent_best = min(self.validation_loss_history[-t:])\n", - " previous_best = min(self.validation_loss_history[:-t])\n", - " if recent_best > previous_best:\n", - " return True\n", - " return False" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Testing and Evaluating the Customized Model" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.datasets import load_breast_cancer\n", - "from sklearn.model_selection import train_test_split as tts\n", - "\n", - "dummy_data = load_breast_cancer()\n", - "X, y = dummy_data.data, dummy_data.target\n", - "\n", - "X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=103)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "model = LogisticClassifier(learning_rate = 0.01,\n", - " tolerance=1e-5,\n", - " max_iter=2000,\n", - " early_stopping=3,\n", - " validation_set=(X_test, y_test))" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "model.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "y_pred = model.predict(X_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "# defining the metrics function\n", - "def accuracy(predictions, actual):\n", - " return sum(predictions == actual) / len(actual)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9736842105263158" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy(y_pred, y_test)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 8626fe6a9801840f33c1f03bd182fd245d728ac8 Mon Sep 17 00:00:00 2001 From: Steven Kolawole <45284829+SteveKola@users.noreply.github.com> Date: Wed, 22 Apr 2020 22:37:43 +0100 Subject: [PATCH 7/9] Create readme.md --- Kolawole_Steven/readme.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 Kolawole_Steven/readme.md diff --git a/Kolawole_Steven/readme.md b/Kolawole_Steven/readme.md new file mode 100644 index 0000000..cd782a5 --- /dev/null +++ b/Kolawole_Steven/readme.md @@ -0,0 +1,10 @@ +A Customized Logistic Regression with Nesterov Accelerated Gradient with Early Stopping option: + + +Nesterov Accelerated Gradient is theorized to converge by at least, a 10 times faster rate than Stochastic Gradient Descent, and over 25 times faster rate than the naive batch gradient descent. + +Nesterov Gradient combines the properties of Stochastic Gradient Descent -which supposedly have the properties that allows it to “jump” out of shallow local minima giving it a better chance of finding a true global minimum- with a 'smarter' momentum, that has a somewhat prescient notion of the global minimum, and knows to slow down before the hill slopes up again. + +But there is a catch; +Converging too fast makes it easier for the model to overfit, causing the well-known bias-variance tradeoff. +My way of avoiding that is to introduce Early Stopping, which works by simply waiting for a certain number of epochs with no improvement in validation loss. From ab9a6da8a2d028cce57906fb34efb6c7ae734340 Mon Sep 17 00:00:00 2001 From: Steven Kolawole <45284829+SteveKola@users.noreply.github.com> Date: Wed, 22 Apr 2020 22:38:33 +0100 Subject: [PATCH 8/9] Add notebook via upload --- .../Logistic Regression from Scratch.ipynb | 322 ++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 Kolawole_Steven/Logistic Regression from Scratch.ipynb diff --git a/Kolawole_Steven/Logistic Regression from Scratch.ipynb b/Kolawole_Steven/Logistic Regression from Scratch.ipynb new file mode 100644 index 0000000..6031d92 --- /dev/null +++ b/Kolawole_Steven/Logistic Regression from Scratch.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Logistic Regression from Scratch\n", + "## - Steven Kolawole " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Intro\n", + "\n", + "Logistic Regression is simply a Linear Regression with a Sigmoid function at its end.\n", + "\n", + "The Sigmoid function generates probability (i.e. outputs between 0 and 1) for all values of X." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "def sigmoid_fxn(x):\n", + " yhat = list(map(lambda i: 1 / (1 + np.exp(-i)), x))\n", + " return yhat\n", + "\n", + "x = np.arange(-10., 10., 0.2)\n", + "logit = sigmoid_fxn(x)\n", + "plt.title(\"The Sigmoid Function Curve\", fontsize=15)\n", + "\n", + "plt.plot(x, logit)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Building my Customized Logistic Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "class LogisticClassifier():\n", + " def __init__(self, \n", + " learning_rate=0.1, \n", + " tolerance=1e-4, \n", + " max_iter=1000, \n", + " batch_size=32, \n", + " momentum_decay=0.9, \n", + " early_stopping=3, \n", + " validation_set=(None,None)):\n", + " \n", + " # Gradient descent parameters\n", + " self.learning_rate = float(learning_rate)\n", + " self.tolerance = float(tolerance)\n", + " self.max_iter = int(max_iter)\n", + " self.batch_size=32\n", + " self.momentum_decay = float(momentum_decay)\n", + " self.early_stopping = int(early_stopping)\n", + " self.X_validation, self.y_validation = validation_set\n", + " \n", + " # to construct the design matrix\n", + " self.add_intercept = True\n", + " self.center = True \n", + " self.scale = True\n", + " \n", + " self.training_loss_history = []\n", + " \n", + " def __sigmoid(self, X):\n", + " return 1 / (1 + np.exp(-X))\n", + " \n", + " # z-score normalization and intercept addition\n", + " def __design_matrix(self, X):\n", + " if self.center:\n", + " X = X - self.means\n", + " if self.scale:\n", + " X = X / self.standard_error\n", + " if self.add_intercept:\n", + " intercept = np.ones((X.shape[0], 1))\n", + " X = np.hstack([intercept, X])\n", + " \n", + " return X\n", + " \n", + " def __fit_center_scale(self, X):\n", + " self.means = X.mean(axis=0)\n", + " self.standard_error = np.std(X, axis=0)\n", + " \n", + " def fit(self, X, y):\n", + " self.__fit_center_scale(X)\n", + "\n", + " n, k = X.shape\n", + " \n", + " # add intercept column to the design matrix\n", + " X = self.__design_matrix(X)\n", + "\n", + " # used for the convergence check\n", + " previous_loss = -float('inf')\n", + " self.converged = False\n", + " self.stopped_early = False\n", + " \n", + " # initialize parameters\n", + " self.beta = np.zeros(k + (1 if self.add_intercept else 0))\n", + " momentum = self.beta * 0 # to get the same shape and dtype as beta\n", + " \n", + " for i in range(self.max_iter):\n", + " shuffle = np.random.permutation(len(y))\n", + " X = X[shuffle, :]\n", + " y = y[shuffle]\n", + " \n", + " # we'll add one more batch, incase the batch size doesn't divide n evenly\n", + " extra = (1 if n % self.batch_size else 0)\n", + "\n", + " for batch_index in range(n // self.batch_size + extra):\n", + " batch_slice = slice(\n", + " self.batch_size * batch_index, \n", + " self.batch_size * (batch_index + 1) )\n", + " X_batch = X[batch_slice, :]\n", + " y_batch = y[batch_slice]\n", + " \n", + " beta_ahead = self.beta + self.momentum_decay * momentum\n", + " y_hat = self.__sigmoid(np.dot(X_batch, self.beta))\n", + " \n", + " # gradient descent\n", + " residuals = (y_hat - y_batch).reshape( (X_batch.shape[0], 1) )\n", + " gradient = (X_batch * residuals).mean(axis=0)\n", + " momentum = self.momentum_decay * momentum - self.learning_rate * gradient\n", + " self.beta += momentum\n", + "\n", + " # with minibatch, we only check convergence at the end of every epoch. \n", + " y_hat = self.__sigmoid(np.dot(X, self.beta))\n", + " self.loss = np.mean(-y * np.log(y_hat) - (1-y) * np.log(1-y_hat))\n", + " self.training_loss_history.append(self.loss)\n", + " \n", + " # early stopping\n", + " if self.check_validation_loss():\n", + " self.stopped_early = True\n", + " break \n", + " \n", + " if abs(previous_loss - self.loss) < self.tolerance:\n", + " self.converged = True\n", + " break\n", + " else:\n", + " previous_loss = self.loss\n", + " \n", + " self.iterations = i+1\n", + " \n", + " def predict_proba(self, X):\n", + " # add intercept column to the design matrix\n", + " X = self.__design_matrix(X)\n", + " return self.__sigmoid(np.dot(X, self.beta))\n", + "\n", + " def predict(self, X):\n", + " predictions = self.predict_proba(X).round()\n", + " return predictions\n", + "\n", + " def check_validation_loss(self):\n", + " # validation set loss\n", + " if not hasattr(self, 'validation_loss_history'):\n", + " self.validation_loss_history = []\n", + " p_hat = self.predict_proba(self.X_validation)\n", + " loss = np.mean(-self.y_validation * np.log(p_hat) - \\\n", + " (1-self.y_validation) * np.log(1-p_hat))\n", + " self.validation_loss_history.append(loss)\n", + " \n", + " t = self.early_stopping\n", + " if t and len(self.validation_loss_history) > t * 2:\n", + " recent_best = min(self.validation_loss_history[-t:])\n", + " previous_best = min(self.validation_loss_history[:-t])\n", + " if recent_best > previous_best:\n", + " return True\n", + " return False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing and Evaluating the Customized Model" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_breast_cancer\n", + "from sklearn.model_selection import train_test_split as tts\n", + "\n", + "dummy_data = load_breast_cancer()\n", + "X, y = dummy_data.data, dummy_data.target\n", + "\n", + "X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=103)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticClassifier(learning_rate = 0.01,\n", + " tolerance=1e-5,\n", + " max_iter=2000,\n", + " early_stopping=3,\n", + " validation_set=(X_test, y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# defining the metrics function\n", + "def accuracy(predictions, actual):\n", + " return sum(predictions == actual) / len(actual)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9736842105263158" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy(y_pred, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 1812f1172d93fc9109ab9887a987c979d189611c Mon Sep 17 00:00:00 2001 From: Steven Kolawole <45284829+SteveKola@users.noreply.github.com> Date: Wed, 22 Apr 2020 22:40:53 +0100 Subject: [PATCH 9/9] Update readme.md --- Kolawole_Steven/readme.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Kolawole_Steven/readme.md b/Kolawole_Steven/readme.md index cd782a5..daff5c8 100644 --- a/Kolawole_Steven/readme.md +++ b/Kolawole_Steven/readme.md @@ -6,5 +6,7 @@ Nesterov Accelerated Gradient is theorized to converge by at least, a 10 times f Nesterov Gradient combines the properties of Stochastic Gradient Descent -which supposedly have the properties that allows it to “jump” out of shallow local minima giving it a better chance of finding a true global minimum- with a 'smarter' momentum, that has a somewhat prescient notion of the global minimum, and knows to slow down before the hill slopes up again. But there is a catch; + Converging too fast makes it easier for the model to overfit, causing the well-known bias-variance tradeoff. -My way of avoiding that is to introduce Early Stopping, which works by simply waiting for a certain number of epochs with no improvement in validation loss. + +My way of avoiding that is to introduce Early Stopping, which works by simply terminating the iterations when there is no improvement in validation loss after a certain number of epochs.