diff --git a/Olaniyan Oluwasegun Emmanuel/README.md b/Olaniyan Oluwasegun Emmanuel/README.md new file mode 100644 index 0000000..04e5463 --- /dev/null +++ b/Olaniyan Oluwasegun Emmanuel/README.md @@ -0,0 +1,24 @@ +# Logistic Regression from scratch +The code is implemented in Python +## Methodology +Below are the methods used in writing Logistic Regression from scratch + +1. Initialize a class Logistic Regression +Logistic Regression was used in the biological sciences in early twentieth century. It was then used in many social science applications. It is used when the dependent variable(target) is categorical. +For example, +**To predict whether an email is spam (1) or (0) +**Whether the tumor is malignant (1) or not (0) + +It was initialized with attribute learning rate (lr), number of iteration(n_iters). +The amount that the weights are updated during training is referred to as the step size or the “learning rate.” +The bias value allows the activation function to be shifted to the left or right, to better fit the data. + +2. Sigmoid Function +The formula for linear function which produces continuous variables is; + f(w,b) = wx + b +where w is the weight and b is bias and x is the data point. Since linear regression is used when the dependent variable(target) is categorical. Sigmoid function tends to convert the linear function to probalilities i.e 0 and 1. +Sigmoid function is 1 / (1 + e^-1) + +3. Gradient Descent +Gradient Descent is used to iteratively update the weight, along with the learning rate to know how far the direction will go. + diff --git a/Olaniyan Oluwasegun Emmanuel/logistic_regression.ipynb b/Olaniyan Oluwasegun Emmanuel/logistic_regression.ipynb new file mode 100644 index 0000000..663cc0e --- /dev/null +++ b/Olaniyan Oluwasegun Emmanuel/logistic_regression.ipynb @@ -0,0 +1,130 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# define class Logistic Regression\n", + "class LogisticRegression:\n", + " def __init__(self, lr=0.001, n_iters=1000):\n", + " self.lr = lr\n", + " self.n_iters = n_iters\n", + " self.weights = None\n", + " self.bias = None\n", + " \n", + " def fit(self, X, y):\n", + " \"\"\"\n", + " Trains the model from the training data\n", + " Parameters\n", + " ----------\n", + " x: array-like, shape = [n_samples, n_features]\n", + " Training samples\n", + " y: array-like, shape = [n_samples, n_target_values]\n", + " Target classes\n", + " Returns\n", + " -------\n", + " self: An instance of self\n", + " \"\"\"\n", + "\n", + " # initialize parameters\n", + " n_samples, n_features = X.shape\n", + " self.weights = np.zeros(n_features)\n", + " self.bias = 0\n", + " \n", + " # sigmoid function to convert the linear function to probabilities\n", + " def _sigmoid(self, x):\n", + " return 1 / (1 + np.exp(-x)) \n", + " \n", + " # gradient descent to update the weight and bias\n", + " for __ in range(self.n_iters):\n", + " linearModel = np.dot(X, self.weights) + self.bias\n", + " y_pred = self._sigmoid(linearModel)\n", + " # using update rules\n", + " dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))\n", + " db = (1 / n_samples) * np.sum(y_pred - y)\n", + " \n", + " self.weights -= self.lr * dw\n", + " self.bias -= self.lr * db\n", + " \n", + " def predict(self, X):\n", + " \"\"\" Predicts the class labels\n", + " Parameters\n", + " ----------\n", + " x: array-like, shape = [n_samples, n_features]\n", + " Test samples\n", + " Returns\n", + " -------\n", + " predicted class labels\n", + " \"\"\"\n", + " linearModel = np.dot(X, self.weights) + self.bias\n", + " y_pred = self._sigmoid(linearModel)\n", + " y_pred_cls = [1 if i > 0.5 else 0 for i in y_pred]\n", + " return y_pred_cls\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy is 0.93\n" + ] + } + ], + "source": [ + "# using breast cancer to test the model\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.datasets import load_breast_cancer\n", + "from logistic_regression import LogisticRegression\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "df = load_breast_cancer()\n", + "X,y = df.data,df.target\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1234)\n", + "log = LogisticRegression(lr=0.0001,n_iters=1000)\n", + "log.fit(X_train,y_train)\n", + "pred = log.predict(X_test)\n", + "\n", + "print('Accuracy is %.2f'%(accuracy_score(pred,y_test)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}