Skip to content

Commit b94a1cc

Browse files
committed
ODSC-45790: Improve error reporting. Improve get_feature_df output. Handle manual featuregroup assosciation with dataset to support complex queries
1 parent 9ba1f32 commit b94a1cc

File tree

3 files changed

+57
-4
lines changed

3 files changed

+57
-4
lines changed

ads/feature_store/dataset.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
from great_expectations.core import ExpectationSuite
1111

1212
from ads import deprecated
13+
from oci.feature_store.models import (
14+
DatasetFeatureGroupCollection,
15+
DatasetFeatureGroupSummary,
16+
)
17+
1318
from ads.common import utils
1419
from ads.common.oci_mixin import OCIModelMixin
1520
from ads.feature_store.common.enums import (
@@ -29,6 +34,7 @@
2934
OciExecutionStrategyProvider,
3035
)
3136
from ads.feature_store.feature import DatasetFeature
37+
from ads.feature_store.feature_group import FeatureGroup
3238
from ads.feature_store.feature_group_expectation import Expectation
3339
from ads.feature_store.feature_option_details import FeatureOptionDetails
3440
from ads.feature_store.service.oci_dataset import OCIDataset
@@ -116,6 +122,7 @@ class Dataset(Builder):
116122
CONST_ITEMS = "items"
117123
CONST_LAST_JOB_ID = "jobId"
118124
CONST_MODEL_DETAILS = "modelDetails"
125+
CONST_FEATURE_GROUP = "datasetFeatureGroups"
119126

120127
attribute_map = {
121128
CONST_ID: "id",
@@ -133,6 +140,7 @@ class Dataset(Builder):
133140
CONST_LIFECYCLE_STATE: "lifecycle_state",
134141
CONST_MODEL_DETAILS: "model_details",
135142
CONST_PARTITION_KEYS: "partition_keys",
143+
CONST_FEATURE_GROUP: "dataset_feature_groups",
136144
}
137145

138146
def __init__(self, spec: Dict = None, **kwargs) -> None:
@@ -530,6 +538,44 @@ def with_model_details(self, model_details: ModelDetails) -> "Dataset":
530538

531539
return self.set_spec(self.CONST_MODEL_DETAILS, model_details.to_dict())
532540

541+
@property
542+
def feature_groups(self) -> List["FeatureGroup"]:
543+
collection: "DatasetFeatureGroupCollection" = self.get_spec(
544+
self.CONST_FEATURE_GROUP
545+
)
546+
feature_groups: List["FeatureGroup"] = []
547+
if collection:
548+
for datasetFGSummary in collection.items:
549+
feature_groups.append(
550+
FeatureGroup.from_id(datasetFGSummary.feature_group_id)
551+
)
552+
553+
return feature_groups
554+
555+
@feature_groups.setter
556+
def feature_groups(self, feature_groups: List["FeatureGroup"]):
557+
self.with_feature_groups(feature_groups)
558+
559+
def with_feature_groups(self, feature_groups: List["FeatureGroup"]) -> "Dataset":
560+
"""Sets the model details for the dataset.
561+
562+
Parameters
563+
----------
564+
feature_groups: List of feature groups
565+
Returns
566+
-------
567+
Dataset
568+
The Dataset instance (self).
569+
570+
"""
571+
collection: List["DatasetFeatureGroupSummary"] = []
572+
for group in feature_groups:
573+
collection.append(DatasetFeatureGroupSummary(feature_group_id=group.id))
574+
575+
return self.set_spec(
576+
self.CONST_FEATURE_GROUP, DatasetFeatureGroupCollection(items=collection)
577+
)
578+
533579
@property
534580
def partition_keys(self) -> List[str]:
535581
return self.get_spec(self.CONST_PARTITION_KEYS)

ads/feature_store/execution_strategy/spark/spark_execution.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
show_validation_summary,
1717
)
1818
from ads.feature_store.execution_strategy.engine.spark_engine import SparkEngine
19+
import traceback
1920

2021
try:
2122
from pyspark.sql import DataFrame
@@ -291,8 +292,9 @@ def _save_offline_dataframe(
291292

292293
except Exception as ex:
293294
error_details = str(ex)
295+
tb = traceback.format_exc()
294296
logger.error(
295-
f"FeatureGroup Materialization Failed with : {type(ex)} with error message: {ex}"
297+
f"FeatureGroup Materialization Failed with : {type(ex)} with error message: {ex} and stacktrace {tb}",
296298
)
297299

298300
show_ingestion_summary(
@@ -427,8 +429,9 @@ def _save_dataset_input(self, dataset, dataset_job: DatasetJob):
427429

428430
except Exception as ex:
429431
error_details = str(ex)
432+
tb = traceback.format_exc()
430433
logger.error(
431-
f"Dataset Materialization Failed with : {type(ex)} with error message: {ex}"
434+
f"Dataset Materialization Failed with : {type(ex)} with error message: {ex} and stacktrace {tb}"
432435
)
433436

434437
show_ingestion_summary(

ads/feature_store/feature_group.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -750,8 +750,12 @@ def get_features_df(self) -> "pd.DataFrame":
750750
"""
751751
records = []
752752
for feature in self.features:
753-
records.append({"name": feature.feature_name, "type": feature.feature_type})
754-
753+
records.append(
754+
{
755+
"name": feature.feature_name,
756+
"type": feature.feature_type,
757+
}
758+
)
755759
return pd.DataFrame.from_records(records)
756760

757761
def get_input_features_df(self) -> "pd.DataFrame":

0 commit comments

Comments
 (0)