diff --git a/LogReg.ipynb b/LogReg.ipynb new file mode 100644 index 0000000..32fa67d --- /dev/null +++ b/LogReg.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#importing the necessary library\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class LogisticRegression:\n", + " def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, verbose=False):\n", + " self.lr = lr\n", + " self.num_iter = num_iter\n", + " self.fit_intercept = fit_intercept\n", + " \n", + " def __add_intercept(self, X):\n", + " intercept = np.ones((X.shape[0], 1))\n", + " return np.concatenate((intercept, X), axis=1)\n", + " \n", + " #Activation function function otherwise known as the sigmoid function,\n", + " #helps to map predicted values to probabilities \n", + " #defining the activation function and storing it with variable named af\n", + " \n", + " def activation_fn(self,x):\n", + " af = 1.0/(1+np.exp(-x))\n", + " return af\n", + " \n", + " #the cost function is also known as the error function\n", + " #this was calculated using the mean absolute error method\n", + " \n", + " def cost_fn(self,features,labels):\n", + " labels = y\n", + " z = len(labels)\n", + " output = activation_fn(cost_fn)\n", + " \n", + " #take the error when y = 1\n", + " class1_cost = -y * np.log(output)\n", + " \n", + " #take the error when y = 0\n", + " class2_cost = (1-y) * np.log(output)\n", + " \n", + " #take the sum of the cost\n", + " cost = class1_cost + class2_cost\n", + " \n", + " #take the average cost\n", + " avg_cost = cost/z\n", + " \n", + " return avg_cost\n", + " \n", + " #In other to minimise cost or error, we use the gradient descent for optimization.\n", + "\n", + " def fit(self, X, y):\n", + " if self.fit_intercept:\n", + " X = self.__add_intercept(X)\n", + " \n", + " # weights initialization\n", + " self.theta = np.zeros(X.shape[1])\n", + " \n", + " for i in range(self.num_iter):\n", + " z = np.dot(X, self.theta)\n", + " h = self.__sigmoid(z)\n", + " gradient = np.dot(X.T, (h - y)) / y.size\n", + " self.theta -= self.lr * gradient\n", + " \n", + " if(self.verbose == True and i % 10000 == 0):\n", + " z = np.dot(X, self.theta)\n", + " h = self.__sigmoid(z)\n", + " print(f'loss: {self.__loss(h, y)} \\t')\n", + " \n", + " # Mapping probabilities to classes i.e assigning class labels to the predicted probabilties\n", + " # defining the decision boundary for class label assigning or mapping\n", + " def probability(self, X):\n", + " if self.fit_intercept:\n", + " X = self.__add_intercept(X)\n", + " return self.activation_fn(np.dot(X, self))\n", + " \n", + " #the prediction function returns a 1D array of probabilities\n", + " def prediction(self, X, threshold):\n", + " return self.predict_prob(X)>=threshold" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/README.md.txt b/README.md.txt new file mode 100644 index 0000000..5c0a313 --- /dev/null +++ b/README.md.txt @@ -0,0 +1,12 @@ +DEVELOPING A LOGISTICS REGRESSION ALGORITHM FROM SCRATCH USING PYTHON + +This project is about creating a logistic regression algorithgm using either python or R, but i choose to use python as my programming language. + +THINGS YOU NEED: + +1. You will need knowledge of python programming, statistics expecially linear regression and sigmoid function, mathematics. +2. You will need to have python installed in your system with any IDE of your choice, preferrably Jupyter notebook. +3. Numpy library is an important tool in executing this code. + +PS: This algorithm is open to correction and I welcome any meaninful contribution. + diff --git a/charles_precious/LogReg.ipynb b/charles_precious/LogReg.ipynb new file mode 100644 index 0000000..32fa67d --- /dev/null +++ b/charles_precious/LogReg.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#importing the necessary library\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class LogisticRegression:\n", + " def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, verbose=False):\n", + " self.lr = lr\n", + " self.num_iter = num_iter\n", + " self.fit_intercept = fit_intercept\n", + " \n", + " def __add_intercept(self, X):\n", + " intercept = np.ones((X.shape[0], 1))\n", + " return np.concatenate((intercept, X), axis=1)\n", + " \n", + " #Activation function function otherwise known as the sigmoid function,\n", + " #helps to map predicted values to probabilities \n", + " #defining the activation function and storing it with variable named af\n", + " \n", + " def activation_fn(self,x):\n", + " af = 1.0/(1+np.exp(-x))\n", + " return af\n", + " \n", + " #the cost function is also known as the error function\n", + " #this was calculated using the mean absolute error method\n", + " \n", + " def cost_fn(self,features,labels):\n", + " labels = y\n", + " z = len(labels)\n", + " output = activation_fn(cost_fn)\n", + " \n", + " #take the error when y = 1\n", + " class1_cost = -y * np.log(output)\n", + " \n", + " #take the error when y = 0\n", + " class2_cost = (1-y) * np.log(output)\n", + " \n", + " #take the sum of the cost\n", + " cost = class1_cost + class2_cost\n", + " \n", + " #take the average cost\n", + " avg_cost = cost/z\n", + " \n", + " return avg_cost\n", + " \n", + " #In other to minimise cost or error, we use the gradient descent for optimization.\n", + "\n", + " def fit(self, X, y):\n", + " if self.fit_intercept:\n", + " X = self.__add_intercept(X)\n", + " \n", + " # weights initialization\n", + " self.theta = np.zeros(X.shape[1])\n", + " \n", + " for i in range(self.num_iter):\n", + " z = np.dot(X, self.theta)\n", + " h = self.__sigmoid(z)\n", + " gradient = np.dot(X.T, (h - y)) / y.size\n", + " self.theta -= self.lr * gradient\n", + " \n", + " if(self.verbose == True and i % 10000 == 0):\n", + " z = np.dot(X, self.theta)\n", + " h = self.__sigmoid(z)\n", + " print(f'loss: {self.__loss(h, y)} \\t')\n", + " \n", + " # Mapping probabilities to classes i.e assigning class labels to the predicted probabilties\n", + " # defining the decision boundary for class label assigning or mapping\n", + " def probability(self, X):\n", + " if self.fit_intercept:\n", + " X = self.__add_intercept(X)\n", + " return self.activation_fn(np.dot(X, self))\n", + " \n", + " #the prediction function returns a 1D array of probabilities\n", + " def prediction(self, X, threshold):\n", + " return self.predict_prob(X)>=threshold" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/charles_precious/README.md.txt b/charles_precious/README.md.txt new file mode 100644 index 0000000..5c0a313 --- /dev/null +++ b/charles_precious/README.md.txt @@ -0,0 +1,12 @@ +DEVELOPING A LOGISTICS REGRESSION ALGORITHM FROM SCRATCH USING PYTHON + +This project is about creating a logistic regression algorithgm using either python or R, but i choose to use python as my programming language. + +THINGS YOU NEED: + +1. You will need knowledge of python programming, statistics expecially linear regression and sigmoid function, mathematics. +2. You will need to have python installed in your system with any IDE of your choice, preferrably Jupyter notebook. +3. Numpy library is an important tool in executing this code. + +PS: This algorithm is open to correction and I welcome any meaninful contribution. +