From cdc9fdbe4db1714fd22c5c785a7752d245442182 Mon Sep 17 00:00:00 2001 From: Elon-Lau <75466256+Elon-Lau@users.noreply.github.com> Date: Sat, 27 May 2023 01:23:09 +0800 Subject: [PATCH] fix(ctr): add a logistic regression ctr ml model. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在CTR目录中添加逻辑回归点击率预估模型的python代码。 --- .idea/.gitignore | 8 ++++ .idea/RecSystem-Pytorch.iml | 12 ++++++ .idea/deployment.xml | 21 +++++++++++ .idea/inspectionProfiles/Project_Default.xml | 6 +++ .../inspectionProfiles/profiles_settings.xml | 6 +++ .idea/misc.xml | 4 ++ .idea/modules.xml | 8 ++++ .idea/vcs.xml | 6 +++ ctr/deepfm.py | 0 ctr/fm.py | 0 ctr/lightgbm.py | 0 ctr/lr.py | 37 +++++++++++++++++++ 12 files changed, 108 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/RecSystem-Pytorch.iml create mode 100644 .idea/deployment.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 ctr/deepfm.py create mode 100644 ctr/fm.py create mode 100644 ctr/lightgbm.py create mode 100644 ctr/lr.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/RecSystem-Pytorch.iml b/.idea/RecSystem-Pytorch.iml new file mode 100644 index 0000000..8b8c395 --- /dev/null +++ b/.idea/RecSystem-Pytorch.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/deployment.xml b/.idea/deployment.xml new file mode 100644 index 0000000..d4913db --- /dev/null +++ b/.idea/deployment.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..cd83845 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..c3334de --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..825d168 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ctr/deepfm.py b/ctr/deepfm.py new file mode 100644 index 0000000..e69de29 diff --git a/ctr/fm.py b/ctr/fm.py new file mode 100644 index 0000000..e69de29 diff --git a/ctr/lightgbm.py b/ctr/lightgbm.py new file mode 100644 index 0000000..e69de29 diff --git a/ctr/lr.py b/ctr/lr.py new file mode 100644 index 0000000..61b80d3 --- /dev/null +++ b/ctr/lr.py @@ -0,0 +1,37 @@ +import numpy as np +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score, roc_auc_score + +# 创建虚拟数据 +data = pd.DataFrame({ + 'feature1': np.random.rand(1000), + 'feature2': np.random.rand(1000), + 'feature3': np.random.rand(1000), + 'clicked': np.random.randint(0, 2, 1000) +}) + +# 定义特征和目标变量 +X = data[['feature1', 'feature2', 'feature3']] +y = data['clicked'] + +# 将数据分为训练集和测试集 +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# 创建逻辑回归模型 +log_reg = LogisticRegression() + +# 训练模型 +log_reg.fit(X_train, y_train) + +# 预测测试集 +y_pred = log_reg.predict(X_test) +y_pred_proba = log_reg.predict_proba(X_test)[:, 1] + +# 计算准确率和AUC +accuracy = accuracy_score(y_test, y_pred) +auc = roc_auc_score(y_test, y_pred_proba) + +print("Accuracy: ", accuracy) +print("AUC: ", auc) \ No newline at end of file