DataScienceNigeria · charlespreshy · Apr 22, 2020 · Apr 22, 2020
diff --git a/LogReg.ipynb b/LogReg.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#importing the necessary library\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class LogisticRegression:\n",
+    "    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, verbose=False):\n",
+    "        self.lr = lr\n",
+    "        self.num_iter = num_iter\n",
+    "        self.fit_intercept = fit_intercept\n",
+    "    \n",
+    "    def __add_intercept(self, X):\n",
+    "        intercept = np.ones((X.shape[0], 1))\n",
+    "        return np.concatenate((intercept, X), axis=1)\n",
+    " \n",
+    "    #Activation function function otherwise known as the sigmoid function,\n",
+    "    #helps to map predicted values to probabilities   \n",
+    "    #defining the activation function and storing it with variable named af\n",
+    "    \n",
+    "    def activation_fn(self,x):\n",
+    "        af = 1.0/(1+np.exp(-x))\n",
+    "        return af\n",
+    "    \n",
+    "    #the cost function is also known as the error function\n",
+    "    #this was calculated using the mean absolute error method\n",
+    "    \n",
+    "    def cost_fn(self,features,labels):\n",
+    "        labels = y\n",
+    "        z = len(labels)\n",
+    "        output = activation_fn(cost_fn)\n",
+    "    \n",
+    "        #take the error when y = 1\n",
+    "        class1_cost = -y * np.log(output)\n",
+    "    \n",
+    "        #take the error when y = 0\n",
+    "        class2_cost = (1-y) * np.log(output)\n",
+    "    \n",
+    "        #take the sum of the cost\n",
+    "        cost = class1_cost + class2_cost\n",
+    "    \n",
+    "        #take the average cost\n",
+    "        avg_cost = cost/z\n",
+    "    \n",
+    "        return avg_cost\n",
+    "    \n",
+    "    #In other to minimise cost or error, we use the gradient descent for optimization.\n",
+    "\n",
+    "    def fit(self, X, y):\n",
+    "        if self.fit_intercept:\n",
+    "            X = self.__add_intercept(X)\n",
+    "        \n",
+    "        # weights initialization\n",
+    "        self.theta = np.zeros(X.shape[1])\n",
+    "        \n",
+    "        for i in range(self.num_iter):\n",
+    "            z = np.dot(X, self.theta)\n",
+    "            h = self.__sigmoid(z)\n",
+    "            gradient = np.dot(X.T, (h - y)) / y.size\n",
+    "            self.theta -= self.lr * gradient\n",
+    "            \n",
+    "        if(self.verbose == True and i % 10000 == 0):\n",
+    "            z = np.dot(X, self.theta)\n",
+    "            h = self.__sigmoid(z)\n",
+    "            print(f'loss: {self.__loss(h, y)} \\t')\n",
+    "            \n",
+    "    # Mapping probabilities to classes i.e assigning class labels  to the predicted probabilties\n",
+    "    # defining the decision boundary for class label assigning or mapping\n",
+    "    def probability(self, X):\n",
+    "        if self.fit_intercept:\n",
+    "            X = self.__add_intercept(X)\n",
+    "            return self.activation_fn(np.dot(X, self))\n",
+    "    \n",
+    "    #the prediction function returns a 1D array of probabilities\n",
+    "    def prediction(self, X, threshold):\n",
+    "        return self.predict_prob(X)>=threshold"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/README.md.txt b/README.md.txt
@@ -0,0 +1,12 @@
+DEVELOPING A LOGISTICS REGRESSION ALGORITHM FROM SCRATCH USING PYTHON
+
+This project is about creating a logistic regression algorithgm using either python or R, but i choose to use python as my programming language.
+
+THINGS YOU NEED:
+
+1. You will need knowledge of python programming, statistics expecially linear regression and sigmoid function, mathematics.
+2. You will need to have python installed in your system with any IDE of your choice, preferrably Jupyter notebook.
+3. Numpy library is an important tool in executing this code.
+
+PS: This algorithm is open to correction and I welcome any meaninful contribution.
+
diff --git a/charles_precious/LogReg.ipynb b/charles_precious/LogReg.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#importing the necessary library\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class LogisticRegression:\n",
+    "    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, verbose=False):\n",
+    "        self.lr = lr\n",
+    "        self.num_iter = num_iter\n",
+    "        self.fit_intercept = fit_intercept\n",
+    "    \n",
+    "    def __add_intercept(self, X):\n",
+    "        intercept = np.ones((X.shape[0], 1))\n",
+    "        return np.concatenate((intercept, X), axis=1)\n",
+    " \n",
+    "    #Activation function function otherwise known as the sigmoid function,\n",
+    "    #helps to map predicted values to probabilities   \n",
+    "    #defining the activation function and storing it with variable named af\n",
+    "    \n",
+    "    def activation_fn(self,x):\n",
+    "        af = 1.0/(1+np.exp(-x))\n",
+    "        return af\n",
+    "    \n",
+    "    #the cost function is also known as the error function\n",
+    "    #this was calculated using the mean absolute error method\n",
+    "    \n",
+    "    def cost_fn(self,features,labels):\n",
+    "        labels = y\n",
+    "        z = len(labels)\n",
+    "        output = activation_fn(cost_fn)\n",
+    "    \n",
+    "        #take the error when y = 1\n",
+    "        class1_cost = -y * np.log(output)\n",
+    "    \n",
+    "        #take the error when y = 0\n",
+    "        class2_cost = (1-y) * np.log(output)\n",
+    "    \n",
+    "        #take the sum of the cost\n",
+    "        cost = class1_cost + class2_cost\n",
+    "    \n",
+    "        #take the average cost\n",
+    "        avg_cost = cost/z\n",
+    "    \n",
+    "        return avg_cost\n",
+    "    \n",
+    "    #In other to minimise cost or error, we use the gradient descent for optimization.\n",
+    "\n",
+    "    def fit(self, X, y):\n",
+    "        if self.fit_intercept:\n",
+    "            X = self.__add_intercept(X)\n",
+    "        \n",
+    "        # weights initialization\n",
+    "        self.theta = np.zeros(X.shape[1])\n",
+    "        \n",
+    "        for i in range(self.num_iter):\n",
+    "            z = np.dot(X, self.theta)\n",
+    "            h = self.__sigmoid(z)\n",
+    "            gradient = np.dot(X.T, (h - y)) / y.size\n",
+    "            self.theta -= self.lr * gradient\n",
+    "            \n",
+    "        if(self.verbose == True and i % 10000 == 0):\n",
+    "            z = np.dot(X, self.theta)\n",
+    "            h = self.__sigmoid(z)\n",
+    "            print(f'loss: {self.__loss(h, y)} \\t')\n",
+    "            \n",
+    "    # Mapping probabilities to classes i.e assigning class labels  to the predicted probabilties\n",
+    "    # defining the decision boundary for class label assigning or mapping\n",
+    "    def probability(self, X):\n",
+    "        if self.fit_intercept:\n",
+    "            X = self.__add_intercept(X)\n",
+    "            return self.activation_fn(np.dot(X, self))\n",
+    "    \n",
+    "    #the prediction function returns a 1D array of probabilities\n",
+    "    def prediction(self, X, threshold):\n",
+    "        return self.predict_prob(X)>=threshold"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/charles_precious/README.md.txt b/charles_precious/README.md.txt
@@ -0,0 +1,12 @@
+DEVELOPING A LOGISTICS REGRESSION ALGORITHM FROM SCRATCH USING PYTHON
+
+This project is about creating a logistic regression algorithgm using either python or R, but i choose to use python as my programming language.
+
+THINGS YOU NEED:
+
+1. You will need knowledge of python programming, statistics expecially linear regression and sigmoid function, mathematics.
+2. You will need to have python installed in your system with any IDE of your choice, preferrably Jupyter notebook.
+3. Numpy library is an important tool in executing this code.
+
+PS: This algorithm is open to correction and I welcome any meaninful contribution.
+