From 02d5b70d81d879f974ac262ad893eb367142968b Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 01:50:48 +0100 Subject: [PATCH 01/11] Add files via upload --- ...cation and Multiclass Classification.ipynb | 255 ++++++++++++++++++ Readme.md | 41 +++ 2 files changed, 296 insertions(+) create mode 100644 Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb create mode 100644 Readme.md diff --git a/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb b/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb new file mode 100644 index 0000000..c8bc2e1 --- /dev/null +++ b/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb @@ -0,0 +1,255 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "class LogisticRegression:\n", + " def __init__(self,lr=0.1, n_iters=1000, multi=False):\n", + " self.lr=lr\n", + " self.n_iters = n_iters\n", + " self.weights = None\n", + " self.bias = None\n", + " self.multi = multi\n", + " self.w = []\n", + " self.b = []\n", + " \n", + " def fit(self,X,y):\n", + " n_samples,n_features = X.shape\n", + " if self.multi == True:\n", + " for i in np.unique(y):\n", + " self.weights = np.zeros(n_features)\n", + " self.bias = 0\n", + " for _ in range(self.n_iters):\n", + " y_ = [1 if j == i else 0 for j in y]\n", + " linear_model = np.dot(X,self.weights)+self.bias\n", + " y_predicted = self._sigmoid(linear_model)\n", + " \n", + " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y_))#- (2 *1* self.weights)\n", + " db = (2/n_samples)*np.sum(y_predicted-y_)\n", + "\n", + " self.weights -=self.lr*dw\n", + " self.bias -=self.lr*db\n", + "\n", + " self.w.append(self.weights)\n", + " self.b.append(self.bias)\n", + " else:\n", + " self.weights = np.zeros(n_features)\n", + " self.bias = 0\n", + " for _ in range(self.n_iters):\n", + " linear_model = np.dot(X,self.weights)+self.bias\n", + " y_predicted = self._sigmoid(linear_model)\n", + " #print(y_predicted)\n", + "\n", + " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y))#- (2 *1* self.weights)\n", + " db = (2/n_samples)*np.sum(y_predicted-y)\n", + "\n", + " self.weights -=self.lr*dw\n", + " self.bias -=self.lr*db\n", + "\n", + " def predict(self,X):\n", + " if self.multi == True:\n", + " predicts = []\n", + " for i in range(len(self.b)):\n", + " linear_model = np.dot(X,self.w[i])+self.b[i]\n", + " y_predicted = self._sigmoid(linear_model)\n", + " predicts.append(y_predicted)\n", + " predicts = np.array(predicts)\n", + " predicts = [predicts[:,i] for i in range(len(y_predicted))]\n", + " return self.softmax(predicts)\n", + " else:\n", + " linear_model = np.dot(X,self.weights)+self.bias\n", + " y_predicted = self._sigmoid(linear_model)\n", + " y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]\n", + " return y_predicted_cls\n", + " \n", + "\n", + " def _sigmoid(self,x):\n", + " return 1/(1+np.exp(-x))\n", + " \n", + " def softmax(self,x):\n", + " return np.exp(x)/ np.sum(np.exp(x),axis=1).reshape(-1,1)\n", + " \n", + " def accuracy(self,y_true, y_pred):\n", + " if self.multi == True:\n", + " y_pred = np.argmax(y_pred,axis=1)\n", + " accuracy = np.sum(y_true==y_pred)/len(y_true)\n", + " return accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Binary classification" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn import datasets\n", + "import pandas as pd\n", + "bc = datasets.load_breast_cancer()\n", + "X,y = bc.data, bc.target\n", + "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=1234)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "model1 = LogisticRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/max/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:64: RuntimeWarning: overflow encountered in exp\n" + ] + } + ], + "source": [ + "model1.fit(X_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/max/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:64: RuntimeWarning: overflow encountered in exp\n" + ] + } + ], + "source": [ + "predictions = model1.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9210526315789473" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model1.accuracy(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multiclass Classification" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn import datasets\n", + "import pandas as pd\n", + "bc = datasets.load_iris()\n", + "X,y = bc.data, bc.target\n", + "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.4, random_state=1234)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9833333333333333" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model1 = LogisticRegression(multi=True)\n", + "model1.fit(X_train,y_train)\n", + "predictions = model1.predict(X_test)\n", + "model1.accuracy(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..3cdca08 --- /dev/null +++ b/Readme.md @@ -0,0 +1,41 @@ +This is a logistic regression algorithm built from scratch with the use of the numpy library only. + +Logistic regression most times only have the capability for binary classification but this particular can also solve for multiclass classification using the OVR(One VS Rest Method.) + +SUMMARY OF THE LOGISTIC REGRESSION ALGORITHM +- Initialize weights with its column as the number of features present in dataset + +- Find the dot product of the weight and the data added with a particular bias which gives us our linear model. + (input_data * weights) + bias. + +- The sigmoid computation is then applied to this result to form probabilities(0 to 1) between predicted classes. + sigmoid_function = 1/1(exp(-x) + +- This helps find correlation between the input data and its weights from which error will be calculated and weigthts contributing to this error will be penalized by calculating the rate of change of error with weights and biases. + dw = (1 / number of input_data)*(input_data * error) + db = (1 / number of input_data)*error + where, error = predictions - true_label + +- From here, weights are updated little by little by multiplying this rate of change with a particular fraction known as the learning rate and subtracting it from previous weights and biases + +- This goes for a couple of iterations defaultly set "n_iters = 1000". This procees is called the gradient descent algorithm. + +Multiclass Classification +- This is done by making binary classification on one of the classes against other classes repeatedly for each class. + +- Sigmoid probabilties of their linear-model are computed. + +- Then each sigmoid probabilities of these binary classifiers are extracted for each datapoint and passed through a softmax activation function. This also helps to put all values directly in probabilties between 0s and 1s making them sum up to 1. + softmaxc_function = exp(xi)/ sum(exp(xi) , where xi = probability for each class for a particular input_data prediction + +- Then the class with the final highest probability is chosen as the prediction. + + +For implementation: +model = LogisticRegression() #This is implements for binary classification and automatically sets the multi parameter to False. +model.fit(X,y) +model.predict(X) +model.accuracy(y, y_predict) + +------------------------------------------------------------------------------------------------------------------ +model = LogisticRegression(multi=True) #This is used for multiclass classification and "multi" is needed to be set "True". From ee16b6d416b04231205b1bc93d6592b606f82a53 Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 01:54:05 +0100 Subject: [PATCH 02/11] Update Readme.md --- Readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Readme.md b/Readme.md index 3cdca08..fae3041 100644 --- a/Readme.md +++ b/Readme.md @@ -38,4 +38,4 @@ model.predict(X) model.accuracy(y, y_predict) ------------------------------------------------------------------------------------------------------------------ -model = LogisticRegression(multi=True) #This is used for multiclass classification and "multi" is needed to be set "True". +model = LogisticRegression(multi=True) #This is used for multiclass classification and "multi" is needed to be set "True". From 7a99201cd82e5e07f6b2a05d4ef597a33e0a5bea Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 02:01:48 +0100 Subject: [PATCH 03/11] Rename Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb to Oke_Matthew_Olawale/Logistic Regression for Binary and Multiclass Classification.ipynb --- ...stic Regression for Binary and Multiclass Classification.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb => Oke_Matthew_Olawale/Logistic Regression for Binary and Multiclass Classification.ipynb (100%) diff --git a/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb b/Oke_Matthew_Olawale/Logistic Regression for Binary and Multiclass Classification.ipynb similarity index 100% rename from Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb rename to Oke_Matthew_Olawale/Logistic Regression for Binary and Multiclass Classification.ipynb From 514c39adac01d87e13ecf86f7ddbb10bb148ae97 Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 02:02:31 +0100 Subject: [PATCH 04/11] Rename Readme.md to Oke_Matthew_Olawale/Readme.md --- Readme.md => Oke_Matthew_Olawale/Readme.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Readme.md => Oke_Matthew_Olawale/Readme.md (100%) diff --git a/Readme.md b/Oke_Matthew_Olawale/Readme.md similarity index 100% rename from Readme.md rename to Oke_Matthew_Olawale/Readme.md From 1d45efbd8ee988d54dcc4e3c6a1b30fffc7a2932 Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 02:04:12 +0100 Subject: [PATCH 05/11] Update Readme.md --- Oke_Matthew_Olawale/Readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Oke_Matthew_Olawale/Readme.md b/Oke_Matthew_Olawale/Readme.md index fae3041..9c97953 100644 --- a/Oke_Matthew_Olawale/Readme.md +++ b/Oke_Matthew_Olawale/Readme.md @@ -2,7 +2,7 @@ This is a logistic regression algorithm built from scratch with the use of the n Logistic regression most times only have the capability for binary classification but this particular can also solve for multiclass classification using the OVR(One VS Rest Method.) -SUMMARY OF THE LOGISTIC REGRESSION ALGORITHM +THE LOGISTIC REGRESSION ALGORITHM - Initialize weights with its column as the number of features present in dataset - Find the dot product of the weight and the data added with a particular bias which gives us our linear model. From 87cdb2a9fd3f0bd94f9c2220569919aa5a3a072d Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 02:04:43 +0100 Subject: [PATCH 06/11] Update Readme.md --- Oke_Matthew_Olawale/Readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Oke_Matthew_Olawale/Readme.md b/Oke_Matthew_Olawale/Readme.md index 9c97953..fae3041 100644 --- a/Oke_Matthew_Olawale/Readme.md +++ b/Oke_Matthew_Olawale/Readme.md @@ -2,7 +2,7 @@ This is a logistic regression algorithm built from scratch with the use of the n Logistic regression most times only have the capability for binary classification but this particular can also solve for multiclass classification using the OVR(One VS Rest Method.) -THE LOGISTIC REGRESSION ALGORITHM +SUMMARY OF THE LOGISTIC REGRESSION ALGORITHM - Initialize weights with its column as the number of features present in dataset - Find the dot product of the weight and the data added with a particular bias which gives us our linear model. From 4e32643da59c5ca0ea7d9bdab48899b70da257e6 Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 02:05:45 +0100 Subject: [PATCH 07/11] Update Readme.md --- Oke_Matthew_Olawale/Readme.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Oke_Matthew_Olawale/Readme.md b/Oke_Matthew_Olawale/Readme.md index fae3041..82f69fa 100644 --- a/Oke_Matthew_Olawale/Readme.md +++ b/Oke_Matthew_Olawale/Readme.md @@ -20,6 +20,8 @@ SUMMARY OF THE LOGISTIC REGRESSION ALGORITHM - This goes for a couple of iterations defaultly set "n_iters = 1000". This procees is called the gradient descent algorithm. + + Multiclass Classification - This is done by making binary classification on one of the classes against other classes repeatedly for each class. @@ -32,10 +34,10 @@ Multiclass Classification For implementation: -model = LogisticRegression() #This is implements for binary classification and automatically sets the multi parameter to False. +model = LogisticRegression() #This is implements for binary classification and automatically sets the multi parameter to False. model.fit(X,y) model.predict(X) model.accuracy(y, y_predict) ------------------------------------------------------------------------------------------------------------------ -model = LogisticRegression(multi=True) #This is used for multiclass classification and "multi" is needed to be set "True". +model = LogisticRegression(multi=True) #This is used for multiclass classification and "multi" is needed to be set "True". From 4e664545cc1e792bcce31bd2a567b4065ef35d81 Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 02:29:40 +0100 Subject: [PATCH 08/11] Delete Logistic Regression for Binary and Multiclass Classification.ipynb --- ...Binary and Multiclass Classification.ipynb | 255 ------------------ 1 file changed, 255 deletions(-) delete mode 100644 Oke_Matthew_Olawale/Logistic Regression for Binary and Multiclass Classification.ipynb diff --git a/Oke_Matthew_Olawale/Logistic Regression for Binary and Multiclass Classification.ipynb b/Oke_Matthew_Olawale/Logistic Regression for Binary and Multiclass Classification.ipynb deleted file mode 100644 index c8bc2e1..0000000 --- a/Oke_Matthew_Olawale/Logistic Regression for Binary and Multiclass Classification.ipynb +++ /dev/null @@ -1,255 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "class LogisticRegression:\n", - " def __init__(self,lr=0.1, n_iters=1000, multi=False):\n", - " self.lr=lr\n", - " self.n_iters = n_iters\n", - " self.weights = None\n", - " self.bias = None\n", - " self.multi = multi\n", - " self.w = []\n", - " self.b = []\n", - " \n", - " def fit(self,X,y):\n", - " n_samples,n_features = X.shape\n", - " if self.multi == True:\n", - " for i in np.unique(y):\n", - " self.weights = np.zeros(n_features)\n", - " self.bias = 0\n", - " for _ in range(self.n_iters):\n", - " y_ = [1 if j == i else 0 for j in y]\n", - " linear_model = np.dot(X,self.weights)+self.bias\n", - " y_predicted = self._sigmoid(linear_model)\n", - " \n", - " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y_))#- (2 *1* self.weights)\n", - " db = (2/n_samples)*np.sum(y_predicted-y_)\n", - "\n", - " self.weights -=self.lr*dw\n", - " self.bias -=self.lr*db\n", - "\n", - " self.w.append(self.weights)\n", - " self.b.append(self.bias)\n", - " else:\n", - " self.weights = np.zeros(n_features)\n", - " self.bias = 0\n", - " for _ in range(self.n_iters):\n", - " linear_model = np.dot(X,self.weights)+self.bias\n", - " y_predicted = self._sigmoid(linear_model)\n", - " #print(y_predicted)\n", - "\n", - " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y))#- (2 *1* self.weights)\n", - " db = (2/n_samples)*np.sum(y_predicted-y)\n", - "\n", - " self.weights -=self.lr*dw\n", - " self.bias -=self.lr*db\n", - "\n", - " def predict(self,X):\n", - " if self.multi == True:\n", - " predicts = []\n", - " for i in range(len(self.b)):\n", - " linear_model = np.dot(X,self.w[i])+self.b[i]\n", - " y_predicted = self._sigmoid(linear_model)\n", - " predicts.append(y_predicted)\n", - " predicts = np.array(predicts)\n", - " predicts = [predicts[:,i] for i in range(len(y_predicted))]\n", - " return self.softmax(predicts)\n", - " else:\n", - " linear_model = np.dot(X,self.weights)+self.bias\n", - " y_predicted = self._sigmoid(linear_model)\n", - " y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]\n", - " return y_predicted_cls\n", - " \n", - "\n", - " def _sigmoid(self,x):\n", - " return 1/(1+np.exp(-x))\n", - " \n", - " def softmax(self,x):\n", - " return np.exp(x)/ np.sum(np.exp(x),axis=1).reshape(-1,1)\n", - " \n", - " def accuracy(self,y_true, y_pred):\n", - " if self.multi == True:\n", - " y_pred = np.argmax(y_pred,axis=1)\n", - " accuracy = np.sum(y_true==y_pred)/len(y_true)\n", - " return accuracy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Binary classification" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn import datasets\n", - "import pandas as pd\n", - "bc = datasets.load_breast_cancer()\n", - "X,y = bc.data, bc.target\n", - "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=1234)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "model1 = LogisticRegression()" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/max/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:64: RuntimeWarning: overflow encountered in exp\n" - ] - } - ], - "source": [ - "model1.fit(X_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/max/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:64: RuntimeWarning: overflow encountered in exp\n" - ] - } - ], - "source": [ - "predictions = model1.predict(X_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9210526315789473" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model1.accuracy(y_test, predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Multiclass Classification" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn import datasets\n", - "import pandas as pd\n", - "bc = datasets.load_iris()\n", - "X,y = bc.data, bc.target\n", - "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.4, random_state=1234)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9833333333333333" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model1 = LogisticRegression(multi=True)\n", - "model1.fit(X_train,y_train)\n", - "predictions = model1.predict(X_test)\n", - "model1.accuracy(y_test, predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 281dac3181a0c853f20185880a73b414b9a428d0 Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 02:30:09 +0100 Subject: [PATCH 09/11] Add files via upload --- ...cation and Multiclass Classification.ipynb | 255 ++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100644 Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb diff --git a/Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb b/Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb new file mode 100644 index 0000000..c8bc2e1 --- /dev/null +++ b/Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb @@ -0,0 +1,255 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "class LogisticRegression:\n", + " def __init__(self,lr=0.1, n_iters=1000, multi=False):\n", + " self.lr=lr\n", + " self.n_iters = n_iters\n", + " self.weights = None\n", + " self.bias = None\n", + " self.multi = multi\n", + " self.w = []\n", + " self.b = []\n", + " \n", + " def fit(self,X,y):\n", + " n_samples,n_features = X.shape\n", + " if self.multi == True:\n", + " for i in np.unique(y):\n", + " self.weights = np.zeros(n_features)\n", + " self.bias = 0\n", + " for _ in range(self.n_iters):\n", + " y_ = [1 if j == i else 0 for j in y]\n", + " linear_model = np.dot(X,self.weights)+self.bias\n", + " y_predicted = self._sigmoid(linear_model)\n", + " \n", + " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y_))#- (2 *1* self.weights)\n", + " db = (2/n_samples)*np.sum(y_predicted-y_)\n", + "\n", + " self.weights -=self.lr*dw\n", + " self.bias -=self.lr*db\n", + "\n", + " self.w.append(self.weights)\n", + " self.b.append(self.bias)\n", + " else:\n", + " self.weights = np.zeros(n_features)\n", + " self.bias = 0\n", + " for _ in range(self.n_iters):\n", + " linear_model = np.dot(X,self.weights)+self.bias\n", + " y_predicted = self._sigmoid(linear_model)\n", + " #print(y_predicted)\n", + "\n", + " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y))#- (2 *1* self.weights)\n", + " db = (2/n_samples)*np.sum(y_predicted-y)\n", + "\n", + " self.weights -=self.lr*dw\n", + " self.bias -=self.lr*db\n", + "\n", + " def predict(self,X):\n", + " if self.multi == True:\n", + " predicts = []\n", + " for i in range(len(self.b)):\n", + " linear_model = np.dot(X,self.w[i])+self.b[i]\n", + " y_predicted = self._sigmoid(linear_model)\n", + " predicts.append(y_predicted)\n", + " predicts = np.array(predicts)\n", + " predicts = [predicts[:,i] for i in range(len(y_predicted))]\n", + " return self.softmax(predicts)\n", + " else:\n", + " linear_model = np.dot(X,self.weights)+self.bias\n", + " y_predicted = self._sigmoid(linear_model)\n", + " y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]\n", + " return y_predicted_cls\n", + " \n", + "\n", + " def _sigmoid(self,x):\n", + " return 1/(1+np.exp(-x))\n", + " \n", + " def softmax(self,x):\n", + " return np.exp(x)/ np.sum(np.exp(x),axis=1).reshape(-1,1)\n", + " \n", + " def accuracy(self,y_true, y_pred):\n", + " if self.multi == True:\n", + " y_pred = np.argmax(y_pred,axis=1)\n", + " accuracy = np.sum(y_true==y_pred)/len(y_true)\n", + " return accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Binary classification" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn import datasets\n", + "import pandas as pd\n", + "bc = datasets.load_breast_cancer()\n", + "X,y = bc.data, bc.target\n", + "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=1234)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "model1 = LogisticRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/max/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:64: RuntimeWarning: overflow encountered in exp\n" + ] + } + ], + "source": [ + "model1.fit(X_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/max/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:64: RuntimeWarning: overflow encountered in exp\n" + ] + } + ], + "source": [ + "predictions = model1.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9210526315789473" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model1.accuracy(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multiclass Classification" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn import datasets\n", + "import pandas as pd\n", + "bc = datasets.load_iris()\n", + "X,y = bc.data, bc.target\n", + "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.4, random_state=1234)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9833333333333333" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model1 = LogisticRegression(multi=True)\n", + "model1.fit(X_train,y_train)\n", + "predictions = model1.predict(X_test)\n", + "model1.accuracy(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From ad2c2cbe00c7c3071957022b0f86967c24442ed5 Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 02:31:19 +0100 Subject: [PATCH 10/11] Delete Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb --- ...cation and Multiclass Classification.ipynb | 255 ------------------ 1 file changed, 255 deletions(-) delete mode 100644 Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb diff --git a/Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb b/Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb deleted file mode 100644 index c8bc2e1..0000000 --- a/Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb +++ /dev/null @@ -1,255 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "class LogisticRegression:\n", - " def __init__(self,lr=0.1, n_iters=1000, multi=False):\n", - " self.lr=lr\n", - " self.n_iters = n_iters\n", - " self.weights = None\n", - " self.bias = None\n", - " self.multi = multi\n", - " self.w = []\n", - " self.b = []\n", - " \n", - " def fit(self,X,y):\n", - " n_samples,n_features = X.shape\n", - " if self.multi == True:\n", - " for i in np.unique(y):\n", - " self.weights = np.zeros(n_features)\n", - " self.bias = 0\n", - " for _ in range(self.n_iters):\n", - " y_ = [1 if j == i else 0 for j in y]\n", - " linear_model = np.dot(X,self.weights)+self.bias\n", - " y_predicted = self._sigmoid(linear_model)\n", - " \n", - " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y_))#- (2 *1* self.weights)\n", - " db = (2/n_samples)*np.sum(y_predicted-y_)\n", - "\n", - " self.weights -=self.lr*dw\n", - " self.bias -=self.lr*db\n", - "\n", - " self.w.append(self.weights)\n", - " self.b.append(self.bias)\n", - " else:\n", - " self.weights = np.zeros(n_features)\n", - " self.bias = 0\n", - " for _ in range(self.n_iters):\n", - " linear_model = np.dot(X,self.weights)+self.bias\n", - " y_predicted = self._sigmoid(linear_model)\n", - " #print(y_predicted)\n", - "\n", - " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y))#- (2 *1* self.weights)\n", - " db = (2/n_samples)*np.sum(y_predicted-y)\n", - "\n", - " self.weights -=self.lr*dw\n", - " self.bias -=self.lr*db\n", - "\n", - " def predict(self,X):\n", - " if self.multi == True:\n", - " predicts = []\n", - " for i in range(len(self.b)):\n", - " linear_model = np.dot(X,self.w[i])+self.b[i]\n", - " y_predicted = self._sigmoid(linear_model)\n", - " predicts.append(y_predicted)\n", - " predicts = np.array(predicts)\n", - " predicts = [predicts[:,i] for i in range(len(y_predicted))]\n", - " return self.softmax(predicts)\n", - " else:\n", - " linear_model = np.dot(X,self.weights)+self.bias\n", - " y_predicted = self._sigmoid(linear_model)\n", - " y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]\n", - " return y_predicted_cls\n", - " \n", - "\n", - " def _sigmoid(self,x):\n", - " return 1/(1+np.exp(-x))\n", - " \n", - " def softmax(self,x):\n", - " return np.exp(x)/ np.sum(np.exp(x),axis=1).reshape(-1,1)\n", - " \n", - " def accuracy(self,y_true, y_pred):\n", - " if self.multi == True:\n", - " y_pred = np.argmax(y_pred,axis=1)\n", - " accuracy = np.sum(y_true==y_pred)/len(y_true)\n", - " return accuracy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Binary classification" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn import datasets\n", - "import pandas as pd\n", - "bc = datasets.load_breast_cancer()\n", - "X,y = bc.data, bc.target\n", - "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=1234)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "model1 = LogisticRegression()" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/max/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:64: RuntimeWarning: overflow encountered in exp\n" - ] - } - ], - "source": [ - "model1.fit(X_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/max/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:64: RuntimeWarning: overflow encountered in exp\n" - ] - } - ], - "source": [ - "predictions = model1.predict(X_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9210526315789473" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model1.accuracy(y_test, predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Multiclass Classification" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn import datasets\n", - "import pandas as pd\n", - "bc = datasets.load_iris()\n", - "X,y = bc.data, bc.target\n", - "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.4, random_state=1234)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9833333333333333" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model1 = LogisticRegression(multi=True)\n", - "model1.fit(X_train,y_train)\n", - "predictions = model1.predict(X_test)\n", - "model1.accuracy(y_test, predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From e13b388cf5a4770e6c5c15545bad65d01ec039ae Mon Sep 17 00:00:00 2001 From: Matthew <40357546+marx-keyz@users.noreply.github.com> Date: Wed, 22 Apr 2020 02:31:58 +0100 Subject: [PATCH 11/11] Add files via upload --- ...cation and Multiclass Classification.ipynb | 267 ++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb diff --git a/Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb b/Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb new file mode 100644 index 0000000..a9ddae7 --- /dev/null +++ b/Oke_Matthew_Olawale/Logistic Regression ALgorithm for Binary Classification and Multiclass Classification.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "class LogisticRegression:\n", + " def __init__(self,lr=0.1, n_iters=1000, multi=False):\n", + " self.lr=lr\n", + " self.n_iters = n_iters\n", + " self.weights = None\n", + " self.bias = None\n", + " self.multi = multi\n", + " #weight and bias for each binary classification during multiclass classification(OVR)\n", + " self.w = []\n", + " self.b = []\n", + " \n", + " def fit(self,X,y):\n", + " n_samples,n_features = X.shape\n", + " \n", + " #training for multi-classification\n", + " if self.multi == True:\n", + " for i in np.unique(y):\n", + " \n", + " #initialize wieghts\n", + " self.weights = np.zeros(n_features)\n", + " self.bias = 0\n", + " \n", + " #Gradient descent algo for each bianry classification during multi_class classification\n", + " for _ in range(self.n_iters):\n", + " y_ = [1 if j == i else 0 for j in y]\n", + " linear_model = np.dot(X,self.weights)+self.bias\n", + " y_predicted = self._sigmoid(linear_model)\n", + " \n", + " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y_))#- (2 *1* self.weights)\n", + " db = (2/n_samples)*np.sum(y_predicted-y_)\n", + " \n", + " #update weights\n", + " self.weights -=self.lr*dw\n", + " self.bias -=self.lr*db\n", + " \n", + " #store binary weights which will be used during prediction(Multi-class)\n", + " self.w.append(self.weights)\n", + " self.b.append(self.bias)\n", + " else:\n", + " #Binary classification training\n", + " self.weights = np.zeros(n_features)\n", + " self.bias = 0\n", + " \n", + " #gradient descent optimzation\n", + " for _ in range(self.n_iters):\n", + " linear_model = np.dot(X,self.weights)+self.bias\n", + " y_predicted = self._sigmoid(linear_model)\n", + " \n", + " #Calculate Error rates\n", + " dw = (1/n_samples)*np.dot(X.T,(y_predicted-y))#- (2 *1* self.weights)\n", + " db = (2/n_samples)*np.sum(y_predicted-y)\n", + " \n", + " #update weights and biases\n", + " self.weights -=self.lr*dw\n", + " self.bias -=self.lr*db\n", + "\n", + " def predict(self,X):\n", + " #Multi-class predictions\n", + " if self.multi == True:\n", + " predicts = []\n", + " for i in range(len(self.b)):\n", + " linear_model = np.dot(X,self.w[i])+self.b[i]\n", + " y_predicted = self._sigmoid(linear_model)\n", + " \n", + " #store prediction for each binary-class predictions\n", + " predicts.append(y_predicted)\n", + " predicts = np.array(predicts)\n", + " \n", + " #collate the prediction for each data point\n", + " predicts = [predicts[:,i] for i in range(len(y_predicted))]\n", + " \n", + " #return softmax to sum probabilities of predictions to 1\n", + " return self.softmax(predicts)\n", + " \n", + " else:\n", + " #binary predictions\n", + " linear_model = np.dot(X,self.weights)+self.bias\n", + " y_predicted = self._sigmoid(linear_model)\n", + " y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]\n", + " return y_predicted_cls\n", + " \n", + "\n", + " def _sigmoid(self,x):\n", + " return 1/(1+np.exp(-x))\n", + " \n", + " def softmax(self,x):\n", + " return np.exp(x)/ np.sum(np.exp(x),axis=1).reshape(-1,1)\n", + " \n", + " #simple accuracy calculation based on amount of model's right predictions compared to label\n", + " def accuracy(self,y_true, y_pred):\n", + " if self.multi == True:\n", + " y_pred = np.argmax(y_pred,axis=1)\n", + " accuracy = np.sum(y_true==y_pred)/len(y_true)\n", + " return accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Binary classification" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#Testing model on the breast cancer dataset provided by sklearn\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn import datasets\n", + "import pandas as pd\n", + "bc = datasets.load_breast_cancer()\n", + "X,y = bc.data, bc.target\n", + "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=1234)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "model1 = LogisticRegression(lr = 0.0001, n_iters=1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#fit training data\n", + "model1.fit(X_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "#make predictions\n", + "predictions = model1.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9298245614035088" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#calculate accuracy\n", + "model1.accuracy(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multiclass Classification" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#Testing model on the iris dataset with three different classes\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn import datasets\n", + "import pandas as pd\n", + "bc = datasets.load_iris()\n", + "X,y = bc.data, bc.target\n", + "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.4, random_state=1234)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9833333333333333" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model1 = LogisticRegression(multi=True)\n", + "model1.fit(X_train,y_train)\n", + "predictions = model1.predict(X_test)\n", + "model1.accuracy(y_test, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}