automl
diff --git a/‎autoPyTorch/api/base_task.py‎
Lines changed: 5 additions & 4 deletions b/‎autoPyTorch/api/base_task.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎autoPyTorch/data/tabular_feature_validator.py‎
Lines changed: 0 additions & 2 deletions b/‎autoPyTorch/data/tabular_feature_validator.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎autoPyTorch/evaluation/train_evaluator.py‎
Lines changed: 24 additions & 19 deletions b/‎autoPyTorch/evaluation/train_evaluator.py‎
Lines changed: 24 additions & 19 deletions
diff --git a/‎autoPyTorch/pipeline/base_pipeline.py‎
Lines changed: 3 additions & 4 deletions b/‎autoPyTorch/pipeline/base_pipeline.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/column_splitting/ColumnSplitter.py‎
Lines changed: 13 additions & 11 deletions b/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/column_splitting/ColumnSplitter.py‎
Lines changed: 13 additions & 11 deletions
diff --git a/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py‎
Lines changed: 1 addition & 2 deletions b/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/utils.py‎
Lines changed: 7 additions & 6 deletions b/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/utils.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py‎
Lines changed: 3 additions & 3 deletions b/‎autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎autoPyTorch/pipeline/components/setup/network_embedding/LearnedEntityEmbedding.py‎
Lines changed: 3 additions & 3 deletions b/‎autoPyTorch/pipeline/components/setup/network_embedding/LearnedEntityEmbedding.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py‎
Lines changed: 2 additions & 2 deletions b/‎autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py‎
Lines changed: 2 additions & 2 deletions
@@ -111,7 +111,7 @@ def send_warnings_to_log(
     return prediction
 
 
-def get_search_updates(categorical_indicator: List[bool]):
+def get_search_updates(categorical_indicator: List[bool]) -> HyperparameterSearchSpaceUpdates:
     """
     These updates mimic the autopytorch tabular paper.
     Returns:
@@ -120,8 +120,8 @@ def get_search_updates(categorical_indicator: List[bool]):
         The search space updates like setting different hps to different values or ranges.
     """
 
-    has_cat_features = any(categorical_indicator)
-    has_numerical_features = not all(categorical_indicator)
+    # has_cat_features = any(categorical_indicator)
+    # has_numerical_features = not all(categorical_indicator)
 
     search_space_updates = HyperparameterSearchSpaceUpdates()
 
@@ -267,7 +267,8 @@ def __init__(
 
         self.input_validator: Optional[BaseInputValidator] = None
 
-        self.search_space_updates = search_space_updates  # if search_space_updates is not None else get_search_updates(categorical_indicator)
+        # if search_space_updates is not None else get_search_updates(categorical_indicator)
+        self.search_space_updates = search_space_updates
         if search_space_updates is not None:
             if not isinstance(self.search_space_updates,
                               HyperparameterSearchSpaceUpdates):
 
@@ -272,8 +272,6 @@ def transform(
             X = self.numpy_to_pandas(X)
 
         if ispandas(X) and not issparse(X):
-            X = cast(pd.DataFrame, X)
-
             if self.all_nan_columns is None:
                 raise ValueError('_fit must be called before calling transform')
 
 
@@ -1,6 +1,6 @@
 import json
-from multiprocessing.queues import Queue
 import os
+from multiprocessing.queues import Queue
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from ConfigSpace.configuration_space import Configuration
@@ -22,6 +22,7 @@
     fit_and_suppress_warnings
 )
 from autoPyTorch.evaluation.utils import DisableFileOutputParameters
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
 from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
 from autoPyTorch.utils.common import dict_repr, subsampler
@@ -195,24 +196,7 @@ def fit_predict_and_loss(self) -> None:
             additional_run_info = pipeline.get_additional_run_info() if hasattr(
                 pipeline, 'get_additional_run_info') else {}
 
-            # # add learning curve of configurations to additional_run_info
-            # if isinstance(pipeline, TabularClassificationPipeline):
-            #     if hasattr(pipeline.named_steps['trainer'], 'run_summary'):
-            #         run_summary = pipeline.named_steps['trainer'].run_summary
-            #         split_types = ['train', 'val', 'test']
-            #         run_summary_dict = dict(
-            #             run_summary={},
-            #             budget=self.budget,
-            #             seed=self.seed,
-            #             config_id=self.configuration.config_id,
-            #             num_run=self.num_run
-            #             )
-            #         for split_type in split_types:
-            #             run_summary_dict['run_summary'][f'{split_type}_loss'] = run_summary.performance_tracker.get(f'{split_type}_loss', None)
-            #             run_summary_dict['run_summary'][f'{split_type}_metrics'] = run_summary.performance_tracker.get(f'{split_type}_metrics', None)
-            #         self.logger.debug(f"run_summary_dict {json.dumps(run_summary_dict)}")
-            #         with open(os.path.join(self.backend.temporary_directory, 'run_summary.txt'), 'a') as file:
-            #             file.write(f"{json.dumps(run_summary_dict)}\n")
+            # self._write_run_summary(pipeline)
 
             status = StatusType.SUCCESS
 
@@ -370,6 +354,27 @@ def fit_predict_and_loss(self) -> None:
                 status=status,
             )
 
+    def _write_run_summary(self, pipeline: BasePipeline) -> None:
+        # add learning curve of configurations to additional_run_info
+        if isinstance(pipeline, TabularClassificationPipeline):
+            assert isinstance(self.configuration, Configuration)
+            if hasattr(pipeline.named_steps['trainer'], 'run_summary'):
+                run_summary = pipeline.named_steps['trainer'].run_summary
+                split_types = ['train', 'val', 'test']
+                run_summary_dict = dict(
+                    run_summary={},
+                    budget=self.budget,
+                    seed=self.seed,
+                    config_id=self.configuration.config_id,
+                    num_run=self.num_run)
+                for split_type in split_types:
+                    run_summary_dict['run_summary'][f'{split_type}_loss'] = run_summary.performance_tracker.get(
+                        f'{split_type}_loss', None)
+                    run_summary_dict['run_summary'][f'{split_type}_metrics'] = run_summary.performance_tracker.get(
+                        f'{split_type}_metrics', None)
+                with open(os.path.join(self.backend.temporary_directory, 'run_summary.txt'), 'a') as file:
+                    file.write(f"{json.dumps(run_summary_dict)}\n")
+
     def _fit_and_predict(self, pipeline: BaseEstimator, fold: int, train_indices: Union[np.ndarray, List],
                          test_indices: Union[np.ndarray, List],
                          add_pipeline_to_self: bool
 
@@ -1,4 +1,3 @@
-from copy import copy
 import warnings
 from abc import ABCMeta
 from collections import Counter
@@ -297,7 +296,7 @@ def _get_hyperparameter_search_space(self,
         """
         raise NotImplementedError()
 
-    def _add_forbidden_conditions(self, cs):
+    def _add_forbidden_conditions(self, cs: ConfigurationSpace) -> ConfigurationSpace:
         """
         Add forbidden conditions to ensure valid configurations.
         Currently, Learned Entity Embedding is only valid when encoder is one hot encoder
@@ -320,7 +319,8 @@ def _add_forbidden_conditions(self, cs):
                 if cyclic_lr_name in available_schedulers:
                     # disable snapshot ensembles and stochastic weight averaging
                     snapshot_ensemble_hyperparameter = cs.get_hyperparameter(f'trainer:{trainer}:use_snapshot_ensemble')
-                    if hasattr(snapshot_ensemble_hyperparameter, 'choices') and True in snapshot_ensemble_hyperparameter.choices:
+                    if hasattr(snapshot_ensemble_hyperparameter, 'choices') and \
+                            True in snapshot_ensemble_hyperparameter.choices:
                         cs.add_forbidden_clause(ForbiddenAndConjunction(
                             ForbiddenEqualsClause(snapshot_ensemble_hyperparameter, True),
                             ForbiddenEqualsClause(cs.get_hyperparameter('lr_scheduler:__choice__'), cyclic_lr_name)
@@ -522,7 +522,6 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                                                                       node_hyperparameters,
                                                                       update.hyperparameter))
 
-
     def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]]
                             ) -> List[Tuple[str, PipelineStepType]]:
         """
 
@@ -11,7 +11,7 @@
 from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import \
     autoPyTorchTabularPreprocessingComponent
-from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter, ispandas
+from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter
 
 
 class ColumnSplitter(autoPyTorchTabularPreprocessingComponent):
@@ -24,8 +24,9 @@ def __init__(
         random_state: Optional[np.random.RandomState] = None
     ):
         self.min_categories_for_embedding = min_categories_for_embedding
+        self.random_state = random_state
 
-        self.special_feature_types = dict(encode_columns=[], embed_columns=[])
+        self.special_feature_types: Dict[str, List] = dict(encode_columns=[], embed_columns=[])
         self.num_categories_per_col: Optional[List] = None
         super().__init__()
 
@@ -35,15 +36,16 @@ def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> 'ColumnSplitter':
 
         if len(X['dataset_properties']['categorical_columns']) > 0:
             self.num_categories_per_col = []
-        for categories_per_column, column in zip(X['dataset_properties']['num_categories_per_col'], X['dataset_properties']['categorical_columns']):
-            if (
-                categories_per_column >= self.min_categories_for_embedding
-            ):
-                self.special_feature_types['embed_columns'].append(column)
-                # we only care about the categories for columns to be embedded
-                self.num_categories_per_col.append(categories_per_column)
-            else:
-                self.special_feature_types['encode_columns'].append(column)
+            for categories_per_column, column in zip(X['dataset_properties']['num_categories_per_col'],
+                                                     X['dataset_properties']['categorical_columns']):
+                if (
+                    categories_per_column >= self.min_categories_for_embedding
+                ):
+                    self.special_feature_types['embed_columns'].append(column)
+                    # we only care about the categories for columns to be embedded
+                    self.num_categories_per_col.append(categories_per_column)
+                else:
+                    self.special_feature_types['encode_columns'].append(column)
 
         return self
 
 
@@ -13,8 +13,7 @@ class BaseEncoder(autoPyTorchTabularPreprocessingComponent):
     def __init__(self) -> None:
         super().__init__()
         self.add_fit_requirements([
-            FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True),
-            ])
+            FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True), ])
 
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         """
 
@@ -1,6 +1,6 @@
 import warnings
 from math import ceil, floor
-from typing import Dict, List, Optional, Sequence
+from typing import Dict, List, Optional, Sequence, Tuple
 
 from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
 from autoPyTorch.utils.common import HyperparameterSearchSpace, HyperparameterValueType
@@ -82,11 +82,12 @@ def percentage_value_range_to_integer_range(
         else:
             log = hyperparameter_search_space.log
 
-        value_range = (
-            floor(float(hyperparameter_search_space.value_range[0]) * n_features),
-            floor(float(hyperparameter_search_space.value_range[-1]) * n_features)) \
-            if len(hyperparameter_search_space.value_range) == 2 else \
-                (floor(float(hyperparameter_search_space.value_range[0]) * n_features),)
+        value_range: Tuple
+        if len(hyperparameter_search_space.value_range) == 2:
+            value_range = (floor(float(hyperparameter_search_space.value_range[0]) * n_features),
+                           floor(float(hyperparameter_search_space.value_range[-1]) * n_features))
+        else:
+            value_range = (floor(float(hyperparameter_search_space.value_range[0]) * n_features),)
 
         hyperparameter_search_space = HyperparameterSearchSpace(
             hyperparameter=hyperparameter_name,
 
@@ -41,9 +41,9 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
 
         # We need to also save the preprocess transforms for inference
         X.update({
-            'preprocess_transforms': transforms,
-            'shape_after_preprocessing': X['X_train'].shape[1:]
-            })
+                 'preprocess_transforms': transforms,
+                 'shape_after_preprocessing': X['X_train'].shape[1:]
+                 })
         return X
 
     @staticmethod
 
@@ -4,7 +4,6 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     UniformFloatHyperparameter,
-    UniformIntegerHyperparameter
 )
 
 import numpy as np
@@ -107,8 +106,9 @@ def get_hyperparameter_search_space(
                 # instead passing it as a parameter to the feature validator, which
                 # allows us to pass embed_columns to the dataset properties.
                 # TODO: test the trade off
-                # Another solution is to combine `OneHotEncoding`, `Embedding` and `NoEncoding` in one custom transformer.
-                # this will also allow users to use this transformer outside the pipeline
+                # Another solution is to combine `OneHotEncoding`, `Embedding` and `NoEncoding`
+                # in one custom transformer. this will also allow users to use this transformer
+                # outside the pipeline
                 ee_dimensions_search_space = HyperparameterSearchSpace(hyperparameter="dimension_reduction_" + str(i),
                                                                        value_range=dimension_reduction.value_range,
                                                                        default_value=dimension_reduction.default_value,
 
@@ -15,7 +15,7 @@ def __init__(self, random_state: Optional[np.random.RandomState] = None):
         super().__init__(random_state=random_state)
         self.add_fit_requirements([
             FitRequirement('num_categories_per_col', (List,), user_defined=True, dataset_property=True),
-            FitRequirement('shape_after_preprocessing', (Tuple), user_defined=False, dataset_property=False)])
+            FitRequirement('shape_after_preprocessing', (Tuple[int],), user_defined=False, dataset_property=False)])
 
         self.embedding: Optional[nn.Module] = None
 
@@ -55,7 +55,7 @@ def _get_required_info_from_data(self, X: Dict[str, Any]) -> Tuple[int, np.ndarr
         num_cols = X['shape_after_preprocessing']
         # only works for 2D(rows, features) tabular data
         num_features_excl_embed = num_cols[0] - len(X['embed_columns'])
-        
+
         num_categories_per_col = np.zeros(num_cols, dtype=np.int16)
 
         categories_per_embed_col = X['dataset_properties']['num_categories_per_col']