Skip to content

Commit dc9ecdb

Browse files
add xgbregressor
1 parent 3a414a4 commit dc9ecdb

File tree

5 files changed

+87
-6
lines changed

5 files changed

+87
-6
lines changed

python/src/lazylearn/lazylearn.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,13 @@ def run_autopilot(self):
5050
5151
:return:
5252
"""
53-
sb = StrategyBuilder(task=self.task, dataset=self.dataset, target=self.target, random_state=self.random_state)
53+
sb = StrategyBuilder(
54+
task=self.task,
55+
dataset=self.dataset,
56+
target=self.target,
57+
random_state=self.random_state,
58+
)
5459
self.leaderboard = sorted([model for model in sb.models], key=lambda x: x.score)
5560

5661
def get_leaderboard(self):
5762
return [(item.name, item.score) for item in self.leaderboard]
58-
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from models.models import Dataset
2+
from pipeline.pipeline import RegressionPipeline
3+
from preprocessing.encoding.encoders import OrdinalConverter
4+
from regression.models.xgboost.xgb_regressor_steps.regressor_step import XGBRegressorStep
5+
6+
7+
class XGBRegressionRunner:
8+
def __init__(self, target, dataset, random_state=None):
9+
self.name = "XGBRegressor"
10+
self.target = target
11+
self.dataset: Dataset = dataset
12+
self.random_state = random_state
13+
self.pipeline = RegressionPipeline()
14+
self.pipeline.target = target
15+
16+
self.pipeline.train_features_df = self.dataset.partitions[
17+
"train"
18+
].copy() # noqa
19+
self.pipeline.train_targets = self.dataset.partitions["train"][target]
20+
self.pipeline.holdout_features_df = self.dataset.partitions[
21+
"test"
22+
].copy() # noqa
23+
self.pipeline.holdout_targets = self.dataset.partitions["test"][target]
24+
25+
def fit(self):
26+
# preprocess numeric vars
27+
cat_vars = self.dataset.type_collections["categorical"]
28+
num_vars = self.dataset.type_collections["numeric"]
29+
self.pipeline.feature_list.extend(num_vars)
30+
31+
self.pipeline.add(OrdinalConverter(cat_vars=cat_vars))
32+
33+
self.pipeline.add(XGBRegressorStep(random_state=self.random_state))
34+
35+
self.pipeline.fit()
36+
37+
def predict(self, features):
38+
self.pipeline.tmp_test = features
39+
return self.pipeline.predict()

python/src/lazylearn/regression/models/xgboost/xgb_regressor_steps/__init__.py

Whitespace-only changes.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from pipeline.pipeline import PipelineStep, RegressionPipeline
2+
from xgboost import XGBRegressor
3+
4+
5+
class XGBRegressorStep(PipelineStep):
6+
def __init__(self, random_state=None):
7+
self.regressor = XGBRegressor(n_estimators=1000, random_state=random_state)
8+
9+
def fit(self, pipeline: RegressionPipeline):
10+
pipeline.feature_list = [
11+
item for item in pipeline.feature_list if item != pipeline.target
12+
]
13+
print("Fitting XGBRegressor")
14+
self.regressor.fit(
15+
X=pipeline.train_features_df[pipeline.feature_list],
16+
y=pipeline.train_targets,
17+
) # noqa
18+
print("XGBRegressor fitted!")
19+
20+
def predict(self, pipeline: RegressionPipeline):
21+
pipeline.tmp_pred = self.regressor.predict(
22+
X=pipeline.tmp_test[pipeline.feature_list]
23+
)

python/src/lazylearn/strategies/strategy_builder.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
from sklearn.metrics import mean_absolute_error
2-
31
from models.models import Model
42
from regression.models.randomforest.randomforest import RandomForestRegressionRunner
3+
from regression.models.xgboost.xgb import XGBRegressionRunner
4+
from sklearn.metrics import mean_absolute_error
5+
56

67
class StrategyBuilder:
78
def __init__(self, task, dataset, target, random_state=None):
@@ -16,14 +17,23 @@ def __init__(self, task, dataset, target, random_state=None):
1617
self.start()
1718

1819
def build(self):
20+
# TODO: Make sure pipelines are not overwriting data for each other
1921
if self.task == "regression":
22+
self.strategies.append(
23+
XGBRegressionRunner(
24+
target=self.target,
25+
dataset=self.dataset,
26+
random_state=self.random_state # noqa
27+
)
28+
)
2029
self.strategies.append(
2130
RandomForestRegressionRunner(
2231
target=self.target,
2332
dataset=self.dataset,
2433
random_state=self.random_state, # noqa
2534
)
2635
)
36+
2737
else:
2838
raise ValueError("Unsupported task!")
2939

@@ -38,5 +48,10 @@ def start(self):
3848
strategy.pipeline.tmp_pred,
3949
)
4050

41-
self.models.append(Model(name=strategy.name, score=strategy.pipeline.holdout_score, pipeline=strategy))
42-
51+
self.models.append(
52+
Model(
53+
name=strategy.name,
54+
score=strategy.pipeline.holdout_score,
55+
pipeline=strategy,
56+
)
57+
)

0 commit comments

Comments
 (0)