diff --git a/Credit Card Fraud Detection Using Machine Learning/Code/Credit Card Fraud Detection - XGBoost.ipynb b/Credit Card Fraud Detection Using Machine Learning/Code/Credit Card Fraud Detection - XGBoost.ipynb new file mode 100644 index 0000000..24a1c24 --- /dev/null +++ b/Credit Card Fraud Detection Using Machine Learning/Code/Credit Card Fraud Detection - XGBoost.ipynb @@ -0,0 +1,296 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "d66194a0", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a406b74c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Time V1 V2 V3 V4 V5 V6 V7 \\\n", + "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", + "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", + "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", + "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", + "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", + "\n", + " V8 V9 ... V21 V22 V23 V24 V25 \\\n", + "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n", + "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n", + "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n", + "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n", + "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n", + "\n", + " V26 V27 V28 Amount Class \n", + "0 -0.189115 0.133558 -0.021053 149.62 0 \n", + "1 0.125895 -0.008983 0.014724 2.69 0 \n", + "2 -0.139097 -0.055353 -0.059752 378.66 0 \n", + "3 -0.221929 0.062723 0.061458 123.50 0 \n", + "4 0.502292 0.219422 0.215153 69.99 0 \n", + "\n", + "[5 rows x 31 columns]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(\"C:/Users/Acer/Downloads/creditcard.csv\")\n", + "print(df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d49b8848", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 284315\n", + "1 492\n", + "Name: Class, dtype: int64\n" + ] + } + ], + "source": [ + "print(df[\"Class\"].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bf488eb3", + "metadata": {}, + "outputs": [], + "source": [ + "X_xgb = df.drop(['Class', 'Time', 'Amount'], axis=1)\n", + "y_xgb = df['Class']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "327bf0da", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,\n", + " colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,\n", + " early_stopping_rounds=None, enable_categorical=False,\n", + " eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n", + " importance_type=None, interaction_constraints='',\n", + " learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,\n", + " max_delta_step=0, max_depth=5, max_leaves=0, min_child_weight=1,\n", + " missing=nan, monotone_constraints='()', n_estimators=100,\n", + " n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,\n", + " reg_alpha=0, reg_lambda=1, ...)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "import xgboost as xgb\n", + "X_train_xgb, X_test_xgb, y_train_xgb, y_test_xgb = train_test_split(X_xgb, y_xgb, test_size = 0.1, random_state = 42, stratify=y_xgb)\n", + "model1 = xgb.XGBClassifier(max_depth=5, scale_pos_weight=100)\n", + "model1.fit(X_train_xgb, y_train_xgb)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4d2a1a48", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, ..., 0, 0, 0])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred_xgb = model1.predict(X_test_xgb)\n", + "y_pred_xgb" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "068e29b2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[28429, 3],\n", + " [ 6, 43]], dtype=int64)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "conf_matrix_xgb = confusion_matrix(y_test_xgb, y_pred_xgb)\n", + "conf_matrix_xgb" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c5c3279a", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "LABELS = ['Non_Fraud', 'Fraud']\n", + "plt.figure(figsize = (6,6))\n", + "sns.heatmap(conf_matrix_xgb, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt=\"d\")\n", + "plt.title('Confusion Matrix')\n", + "plt.xlabel('XGB_Prediction')\n", + "plt.ylabel('Actual')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fea3c39f", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score, roc_auc_score\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Get probability scores for class 1 (Fraud)\n", + "y_scores_xgb = model1.predict_proba(X_test_xgb)[:, 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ded525fd", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# ROC Curve\n", + "fpr, tpr, thresholds = roc_curve(y_test_xgb, y_scores_xgb)\n", + "roc_auc = auc(fpr, tpr)\n", + "\n", + "plt.figure(figsize=(8, 6))\n", + "plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.4f})', color='blue')\n", + "plt.plot([0, 1], [0, 1], linestyle='--', color='gray') # random model line\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('ROC Curve')\n", + "plt.legend()\n", + "plt.grid(True)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a4193624", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Precision-Recall Curve\n", + "precision, recall, pr_thresholds = precision_recall_curve(y_test_xgb, y_scores_xgb)\n", + "average_precision = average_precision_score(y_test_xgb, y_scores_xgb)\n", + "\n", + "plt.figure(figsize=(8, 6))\n", + "plt.plot(recall, precision, label=f'Precision-Recall (AP = {average_precision:.4f})', color='green')\n", + "plt.xlabel('Recall')\n", + "plt.ylabel('Precision')\n", + "plt.title('Precision-Recall Curve')\n", + "plt.legend()\n", + "plt.grid(True)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a5a6c3c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}