Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Olaniyan Oluwasegun Emmanuel/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Logistic Regression from scratch
The code is implemented in Python
## Methodology
Below are the methods used in writing Logistic Regression from scratch

1. Initialize a class Logistic Regression
Logistic Regression was used in the biological sciences in early twentieth century. It was then used in many social science applications. It is used when the dependent variable(target) is categorical.
For example,
**To predict whether an email is spam (1) or (0)
**Whether the tumor is malignant (1) or not (0)

It was initialized with attribute learning rate (lr), number of iteration(n_iters).
The amount that the weights are updated during training is referred to as the step size or the “learning rate.”
The bias value allows the activation function to be shifted to the left or right, to better fit the data.

2. Sigmoid Function
The formula for linear function which produces continuous variables is;
f(w,b) = wx + b
where w is the weight and b is bias and x is the data point. Since linear regression is used when the dependent variable(target) is categorical. Sigmoid function tends to convert the linear function to probalilities i.e 0 and 1.
Sigmoid function is 1 / (1 + e^-1)

3. Gradient Descent
Gradient Descent is used to iteratively update the weight, along with the learning rate to know how far the direction will go.

130 changes: 130 additions & 0 deletions Olaniyan Oluwasegun Emmanuel/logistic_regression.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"# define class Logistic Regression\n",
"class LogisticRegression:\n",
" def __init__(self, lr=0.001, n_iters=1000):\n",
" self.lr = lr\n",
" self.n_iters = n_iters\n",
" self.weights = None\n",
" self.bias = None\n",
" \n",
" def fit(self, X, y):\n",
" \"\"\"\n",
" Trains the model from the training data\n",
" Parameters\n",
" ----------\n",
" x: array-like, shape = [n_samples, n_features]\n",
" Training samples\n",
" y: array-like, shape = [n_samples, n_target_values]\n",
" Target classes\n",
" Returns\n",
" -------\n",
" self: An instance of self\n",
" \"\"\"\n",
"\n",
" # initialize parameters\n",
" n_samples, n_features = X.shape\n",
" self.weights = np.zeros(n_features)\n",
" self.bias = 0\n",
" \n",
" # sigmoid function to convert the linear function to probabilities\n",
" def _sigmoid(self, x):\n",
" return 1 / (1 + np.exp(-x)) \n",
" \n",
" # gradient descent to update the weight and bias\n",
" for __ in range(self.n_iters):\n",
" linearModel = np.dot(X, self.weights) + self.bias\n",
" y_pred = self._sigmoid(linearModel)\n",
" # using update rules\n",
" dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))\n",
" db = (1 / n_samples) * np.sum(y_pred - y)\n",
" \n",
" self.weights -= self.lr * dw\n",
" self.bias -= self.lr * db\n",
" \n",
" def predict(self, X):\n",
" \"\"\" Predicts the class labels\n",
" Parameters\n",
" ----------\n",
" x: array-like, shape = [n_samples, n_features]\n",
" Test samples\n",
" Returns\n",
" -------\n",
" predicted class labels\n",
" \"\"\"\n",
" linearModel = np.dot(X, self.weights) + self.bias\n",
" y_pred = self._sigmoid(linearModel)\n",
" y_pred_cls = [1 if i > 0.5 else 0 for i in y_pred]\n",
" return y_pred_cls\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy is 0.93\n"
]
}
],
"source": [
"# using breast cancer to test the model\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.datasets import load_breast_cancer\n",
"from logistic_regression import LogisticRegression\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"df = load_breast_cancer()\n",
"X,y = df.data,df.target\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1234)\n",
"log = LogisticRegression(lr=0.0001,n_iters=1000)\n",
"log.fit(X_train,y_train)\n",
"pred = log.predict(X_test)\n",
"\n",
"print('Accuracy is %.2f'%(accuracy_score(pred,y_test)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}