Skip to content

Commit 9573358

Browse files
committed
fix precommit and add test changes
1 parent adc26d5 commit 9573358

File tree

20 files changed

+94
-155
lines changed

20 files changed

+94
-155
lines changed

autoPyTorch/api/base_task.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def send_warnings_to_log(
111111
return prediction
112112

113113

114-
def get_search_updates(categorical_indicator: List[bool]):
114+
def get_search_updates(categorical_indicator: List[bool]) -> HyperparameterSearchSpaceUpdates:
115115
"""
116116
These updates mimic the autopytorch tabular paper.
117117
Returns:
@@ -120,8 +120,8 @@ def get_search_updates(categorical_indicator: List[bool]):
120120
The search space updates like setting different hps to different values or ranges.
121121
"""
122122

123-
has_cat_features = any(categorical_indicator)
124-
has_numerical_features = not all(categorical_indicator)
123+
# has_cat_features = any(categorical_indicator)
124+
# has_numerical_features = not all(categorical_indicator)
125125

126126
search_space_updates = HyperparameterSearchSpaceUpdates()
127127

@@ -267,7 +267,8 @@ def __init__(
267267

268268
self.input_validator: Optional[BaseInputValidator] = None
269269

270-
self.search_space_updates = search_space_updates # if search_space_updates is not None else get_search_updates(categorical_indicator)
270+
# if search_space_updates is not None else get_search_updates(categorical_indicator)
271+
self.search_space_updates = search_space_updates
271272
if search_space_updates is not None:
272273
if not isinstance(self.search_space_updates,
273274
HyperparameterSearchSpaceUpdates):

autoPyTorch/data/tabular_feature_validator.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,6 @@ def transform(
272272
X = self.numpy_to_pandas(X)
273273

274274
if ispandas(X) and not issparse(X):
275-
X = cast(pd.DataFrame, X)
276-
277275
if self.all_nan_columns is None:
278276
raise ValueError('_fit must be called before calling transform')
279277

autoPyTorch/evaluation/train_evaluator.py

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
2-
from multiprocessing.queues import Queue
32
import os
3+
from multiprocessing.queues import Queue
44
from typing import Any, Dict, List, Optional, Tuple, Union
55

66
from ConfigSpace.configuration_space import Configuration
@@ -22,6 +22,7 @@
2222
fit_and_suppress_warnings
2323
)
2424
from autoPyTorch.evaluation.utils import DisableFileOutputParameters
25+
from autoPyTorch.pipeline.base_pipeline import BasePipeline
2526
from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
2627
from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
2728
from autoPyTorch.utils.common import dict_repr, subsampler
@@ -195,24 +196,7 @@ def fit_predict_and_loss(self) -> None:
195196
additional_run_info = pipeline.get_additional_run_info() if hasattr(
196197
pipeline, 'get_additional_run_info') else {}
197198

198-
# # add learning curve of configurations to additional_run_info
199-
# if isinstance(pipeline, TabularClassificationPipeline):
200-
# if hasattr(pipeline.named_steps['trainer'], 'run_summary'):
201-
# run_summary = pipeline.named_steps['trainer'].run_summary
202-
# split_types = ['train', 'val', 'test']
203-
# run_summary_dict = dict(
204-
# run_summary={},
205-
# budget=self.budget,
206-
# seed=self.seed,
207-
# config_id=self.configuration.config_id,
208-
# num_run=self.num_run
209-
# )
210-
# for split_type in split_types:
211-
# run_summary_dict['run_summary'][f'{split_type}_loss'] = run_summary.performance_tracker.get(f'{split_type}_loss', None)
212-
# run_summary_dict['run_summary'][f'{split_type}_metrics'] = run_summary.performance_tracker.get(f'{split_type}_metrics', None)
213-
# self.logger.debug(f"run_summary_dict {json.dumps(run_summary_dict)}")
214-
# with open(os.path.join(self.backend.temporary_directory, 'run_summary.txt'), 'a') as file:
215-
# file.write(f"{json.dumps(run_summary_dict)}\n")
199+
# self._write_run_summary(pipeline)
216200

217201
status = StatusType.SUCCESS
218202

@@ -370,6 +354,27 @@ def fit_predict_and_loss(self) -> None:
370354
status=status,
371355
)
372356

357+
def _write_run_summary(self, pipeline: BasePipeline) -> None:
358+
# add learning curve of configurations to additional_run_info
359+
if isinstance(pipeline, TabularClassificationPipeline):
360+
assert isinstance(self.configuration, Configuration)
361+
if hasattr(pipeline.named_steps['trainer'], 'run_summary'):
362+
run_summary = pipeline.named_steps['trainer'].run_summary
363+
split_types = ['train', 'val', 'test']
364+
run_summary_dict = dict(
365+
run_summary={},
366+
budget=self.budget,
367+
seed=self.seed,
368+
config_id=self.configuration.config_id,
369+
num_run=self.num_run)
370+
for split_type in split_types:
371+
run_summary_dict['run_summary'][f'{split_type}_loss'] = run_summary.performance_tracker.get(
372+
f'{split_type}_loss', None)
373+
run_summary_dict['run_summary'][f'{split_type}_metrics'] = run_summary.performance_tracker.get(
374+
f'{split_type}_metrics', None)
375+
with open(os.path.join(self.backend.temporary_directory, 'run_summary.txt'), 'a') as file:
376+
file.write(f"{json.dumps(run_summary_dict)}\n")
377+
373378
def _fit_and_predict(self, pipeline: BaseEstimator, fold: int, train_indices: Union[np.ndarray, List],
374379
test_indices: Union[np.ndarray, List],
375380
add_pipeline_to_self: bool

autoPyTorch/pipeline/base_pipeline.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from copy import copy
21
import warnings
32
from abc import ABCMeta
43
from collections import Counter
@@ -297,7 +296,7 @@ def _get_hyperparameter_search_space(self,
297296
"""
298297
raise NotImplementedError()
299298

300-
def _add_forbidden_conditions(self, cs):
299+
def _add_forbidden_conditions(self, cs: ConfigurationSpace) -> ConfigurationSpace:
301300
"""
302301
Add forbidden conditions to ensure valid configurations.
303302
Currently, Learned Entity Embedding is only valid when encoder is one hot encoder
@@ -320,7 +319,8 @@ def _add_forbidden_conditions(self, cs):
320319
if cyclic_lr_name in available_schedulers:
321320
# disable snapshot ensembles and stochastic weight averaging
322321
snapshot_ensemble_hyperparameter = cs.get_hyperparameter(f'trainer:{trainer}:use_snapshot_ensemble')
323-
if hasattr(snapshot_ensemble_hyperparameter, 'choices') and True in snapshot_ensemble_hyperparameter.choices:
322+
if hasattr(snapshot_ensemble_hyperparameter, 'choices') and \
323+
True in snapshot_ensemble_hyperparameter.choices:
324324
cs.add_forbidden_clause(ForbiddenAndConjunction(
325325
ForbiddenEqualsClause(snapshot_ensemble_hyperparameter, True),
326326
ForbiddenEqualsClause(cs.get_hyperparameter('lr_scheduler:__choice__'), cyclic_lr_name)
@@ -522,7 +522,6 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
522522
node_hyperparameters,
523523
update.hyperparameter))
524524

525-
526525
def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]]
527526
) -> List[Tuple[str, PipelineStepType]]:
528527
"""

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/column_splitting/ColumnSplitter.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
1212
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import \
1313
autoPyTorchTabularPreprocessingComponent
14-
from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter, ispandas
14+
from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter
1515

1616

1717
class ColumnSplitter(autoPyTorchTabularPreprocessingComponent):
@@ -24,8 +24,9 @@ def __init__(
2424
random_state: Optional[np.random.RandomState] = None
2525
):
2626
self.min_categories_for_embedding = min_categories_for_embedding
27+
self.random_state = random_state
2728

28-
self.special_feature_types = dict(encode_columns=[], embed_columns=[])
29+
self.special_feature_types: Dict[str, List] = dict(encode_columns=[], embed_columns=[])
2930
self.num_categories_per_col: Optional[List] = None
3031
super().__init__()
3132

@@ -35,15 +36,16 @@ def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> 'ColumnSplitter':
3536

3637
if len(X['dataset_properties']['categorical_columns']) > 0:
3738
self.num_categories_per_col = []
38-
for categories_per_column, column in zip(X['dataset_properties']['num_categories_per_col'], X['dataset_properties']['categorical_columns']):
39-
if (
40-
categories_per_column >= self.min_categories_for_embedding
41-
):
42-
self.special_feature_types['embed_columns'].append(column)
43-
# we only care about the categories for columns to be embedded
44-
self.num_categories_per_col.append(categories_per_column)
45-
else:
46-
self.special_feature_types['encode_columns'].append(column)
39+
for categories_per_column, column in zip(X['dataset_properties']['num_categories_per_col'],
40+
X['dataset_properties']['categorical_columns']):
41+
if (
42+
categories_per_column >= self.min_categories_for_embedding
43+
):
44+
self.special_feature_types['embed_columns'].append(column)
45+
# we only care about the categories for columns to be embedded
46+
self.num_categories_per_col.append(categories_per_column)
47+
else:
48+
self.special_feature_types['encode_columns'].append(column)
4749

4850
return self
4951

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ class BaseEncoder(autoPyTorchTabularPreprocessingComponent):
1313
def __init__(self) -> None:
1414
super().__init__()
1515
self.add_fit_requirements([
16-
FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True),
17-
])
16+
FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True), ])
1817

1918
def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
2019
"""

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/utils.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import warnings
22
from math import ceil, floor
3-
from typing import Dict, List, Optional, Sequence
3+
from typing import Dict, List, Optional, Sequence, Tuple
44

55
from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
66
from autoPyTorch.utils.common import HyperparameterSearchSpace, HyperparameterValueType
@@ -82,11 +82,12 @@ def percentage_value_range_to_integer_range(
8282
else:
8383
log = hyperparameter_search_space.log
8484

85-
value_range = (
86-
floor(float(hyperparameter_search_space.value_range[0]) * n_features),
87-
floor(float(hyperparameter_search_space.value_range[-1]) * n_features)) \
88-
if len(hyperparameter_search_space.value_range) == 2 else \
89-
(floor(float(hyperparameter_search_space.value_range[0]) * n_features),)
85+
value_range: Tuple
86+
if len(hyperparameter_search_space.value_range) == 2:
87+
value_range = (floor(float(hyperparameter_search_space.value_range[0]) * n_features),
88+
floor(float(hyperparameter_search_space.value_range[-1]) * n_features))
89+
else:
90+
value_range = (floor(float(hyperparameter_search_space.value_range[0]) * n_features),)
9091

9192
hyperparameter_search_space = HyperparameterSearchSpace(
9293
hyperparameter=hyperparameter_name,

autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
4141

4242
# We need to also save the preprocess transforms for inference
4343
X.update({
44-
'preprocess_transforms': transforms,
45-
'shape_after_preprocessing': X['X_train'].shape[1:]
46-
})
44+
'preprocess_transforms': transforms,
45+
'shape_after_preprocessing': X['X_train'].shape[1:]
46+
})
4747
return X
4848

4949
@staticmethod

autoPyTorch/pipeline/components/setup/network_embedding/LearnedEntityEmbedding.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from ConfigSpace.configuration_space import ConfigurationSpace
55
from ConfigSpace.hyperparameters import (
66
UniformFloatHyperparameter,
7-
UniformIntegerHyperparameter
87
)
98

109
import numpy as np
@@ -107,8 +106,9 @@ def get_hyperparameter_search_space(
107106
# instead passing it as a parameter to the feature validator, which
108107
# allows us to pass embed_columns to the dataset properties.
109108
# TODO: test the trade off
110-
# Another solution is to combine `OneHotEncoding`, `Embedding` and `NoEncoding` in one custom transformer.
111-
# this will also allow users to use this transformer outside the pipeline
109+
# Another solution is to combine `OneHotEncoding`, `Embedding` and `NoEncoding`
110+
# in one custom transformer. this will also allow users to use this transformer
111+
# outside the pipeline
112112
ee_dimensions_search_space = HyperparameterSearchSpace(hyperparameter="dimension_reduction_" + str(i),
113113
value_range=dimension_reduction.value_range,
114114
default_value=dimension_reduction.default_value,

autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def __init__(self, random_state: Optional[np.random.RandomState] = None):
1515
super().__init__(random_state=random_state)
1616
self.add_fit_requirements([
1717
FitRequirement('num_categories_per_col', (List,), user_defined=True, dataset_property=True),
18-
FitRequirement('shape_after_preprocessing', (Tuple), user_defined=False, dataset_property=False)])
18+
FitRequirement('shape_after_preprocessing', (Tuple[int],), user_defined=False, dataset_property=False)])
1919

2020
self.embedding: Optional[nn.Module] = None
2121

@@ -55,7 +55,7 @@ def _get_required_info_from_data(self, X: Dict[str, Any]) -> Tuple[int, np.ndarr
5555
num_cols = X['shape_after_preprocessing']
5656
# only works for 2D(rows, features) tabular data
5757
num_features_excl_embed = num_cols[0] - len(X['embed_columns'])
58-
58+
5959
num_categories_per_col = np.zeros(num_cols, dtype=np.int16)
6060

6161
categories_per_embed_col = X['dataset_properties']['num_categories_per_col']

0 commit comments

Comments
 (0)