Skip to content

Commit c22d7a6

Browse files
committed
Merge remote-tracking branch 'origin/feature/feature-store-marketplace-operator' into feature/feature-store-marketplace-operator
2 parents f624981 + 72e753c commit c22d7a6

File tree

7 files changed

+71
-16
lines changed

7 files changed

+71
-16
lines changed

ads/feature_store/common/utils/transformation_utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,7 @@ def apply_transformation(
5252
# Execute the function under namespace
5353
execution_namespace = {}
5454
exec(transformation_function, execution_namespace)
55-
transformation_function_caller = execution_namespace.get(
56-
transformation.name
57-
)
55+
transformation_function_caller = execution_namespace.get(transformation.name)
5856
transformed_data = None
5957

6058
transformation_kwargs_dict = json.loads(transformation_kwargs)

ads/feature_store/docs/source/dataset.rst

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ The following example defines a dataset and gives it a name. A ``Dataset`` insta
2727
.with_feature_store_id("<feature_store_id>")
2828
.with_description("<dataset_description>")
2929
.with_compartment_id("<compartment_id>")
30-
.with_dataset_ingestion_mode(DatasetIngestionMode.SQL)
3130
.with_query('SELECT col FROM <entity_id>.<feature_group_name>')
3231
)
3332

@@ -56,12 +55,50 @@ Use the the ``create()`` method of the ``Dataset`` instance to create a dataset.
5655

5756
.. important::
5857

59-
This method doesn’t persist any metadata or feature data in the Feature Store. To persist the dataset and save feature data including the metadata in the Feature Store, use the ``materialise()`` method with a dataframe.
58+
This method does not persist any metadata or feature data in the Feature Store. To persist the dataset and save feature data including the metadata in the Feature Store, use the ``materialise()`` method with a dataframe. For simple queries with only one level of nesting, users do not need to define ``with_feature_groups``. However, in complex queries involving more than one level of nesting, users are required to define ``with_feature_groups``.
6059

61-
.. code-block:: python3
6260

63-
# Create an dataset
64-
dataset.create()
61+
.. tabs::
62+
63+
.. code-tab:: Python3
64+
:caption: Simple SQL
65+
66+
from ads.feature_store.dataset import Dataset
67+
68+
dataset = (
69+
Dataset
70+
.with_name("<dataset_name>")
71+
.with_entity_id(<entity_id>)
72+
.with_feature_store_id("<feature_store_id>")
73+
.with_description("<dataset_description>")
74+
.with_compartment_id("<compartment_id>")
75+
.with_query('SELECT col FROM <entity_id>.<feature_group_name>')
76+
)
77+
78+
dataset.create()
79+
80+
81+
.. code-tab:: Python3
82+
:caption: Complex SQL
83+
84+
from ads.feature_store.dataset import Dataset
85+
from ads.feature_store.feature_group import FeatureGroup
86+
87+
feature_group = FeatureGroup.from_id("<unique_id>")
88+
89+
dataset = (
90+
Dataset
91+
.with_name("<dataset_name>")
92+
.with_entity_id(<entity_id>)
93+
.with_feature_store_id("<feature_store_id>")
94+
.with_description("<dataset_description>")
95+
.with_compartment_id("<compartment_id>")
96+
.with_query('SELECT col FROM (SELECT col FROM <entity_id>.<feature_group_name> WHERE condition = 'some_condition') AS nested_table;')
97+
.with_feature_groups([feature_group])
98+
)
99+
100+
# Create an dataset
101+
dataset.create()
65102

66103

67104
Load

ads/feature_store/docs/source/feature_group.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ In feature group, three key types play crucial roles:
1111
- **Partition Keys**: These keys assist in distributing and organizing data across different partitions in the feature group. They enable efficient data retrieval by logically grouping related information, optimizing query performance, and minimizing resource utilization.
1212
- **Event Timestamp Keys**: These keys capture the temporal aspect of the data by indicating the time or timestamp associated with a specific event or entry in the feature group. They facilitate time-based querying, trend analysis, and time-series operations.
1313

14+
.. important::
15+
By default, special characters such as spaces and any of the characters ,;{}()\n\t= are not supported in feature group column names.
16+
1417
Define
1518
======
1619

@@ -353,6 +356,17 @@ Feature store provides an API similar to Pandas to join feature groups together,
353356
# Filter feature group
354357
feature_group.filter(feature_group.col1 > 10).show()
355358
359+
# Filter feature group with get_feature
360+
feature_group.filter(feature_group.get_feature("col1") > 10).show()
361+
362+
363+
Get Feature
364+
===========
365+
Use the ``get_feature()`` method of the ``FeatureGroup`` instance to return the description of feature.
366+
367+
.. code-block:: python3
368+
369+
feature_group.get_feature("col1")
356370
357371
Preview
358372
=======

ads/feature_store/docs/source/transformation.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Transformations in a Feature Store refer to the operations and processes applied
1414
.. code-tab:: Python3
1515
:caption: TransformationMode.SQL
1616

17+
from ads.feature_store.transformation import Transformation,TransformationMode
18+
1719
def transactions_df(transactions_batch):
1820
sql_query = f"select id, cc_num, amount from {transactions_batch}"
1921
return sql_query

ads/feature_store/execution_strategy/spark/spark_execution.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,10 @@ def update_feature_definition_features(self, feature_group, target_table):
366366
try:
367367
# Get the output features
368368
output_features = get_features(
369-
self.spark_engine.get_output_columns_from_table_or_dataframe(table_name=target_table), feature_group.id
369+
self.spark_engine.get_output_columns_from_table_or_dataframe(
370+
table_name=target_table
371+
),
372+
feature_group.id,
370373
)
371374
if output_features:
372375
feature_group._with_features(output_features)
@@ -392,7 +395,9 @@ def update_dataset_features(self, dataset, target_table):
392395
try:
393396
# Get the output features
394397
output_features = get_features(
395-
output_columns=self.spark_engine.get_output_columns_from_table_or_dataframe(table_name=target_table),
398+
output_columns=self.spark_engine.get_output_columns_from_table_or_dataframe(
399+
table_name=target_table
400+
),
396401
parent_id=dataset.id,
397402
entity_type=EntityType.DATASET,
398403
)

ads/feature_store/feature_store.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,9 +367,7 @@ def create_entity(
367367
"FeatureStore Resource must be created or saved before creating the entity."
368368
)
369369

370-
self.oci_fs_entity = self._build_entity(
371-
name, description, compartment_id
372-
)
370+
self.oci_fs_entity = self._build_entity(name, description, compartment_id)
373371
return self.oci_fs_entity.create()
374372

375373
def delete_entity(self):

ads/feature_store/transformation.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,13 +346,14 @@ def create(self, **kwargs) -> "Transformation":
346346
if not self.source_code_function:
347347
raise ValueError("Transformation source code function must be provided.")
348348

349+
if not self.transformation_mode:
350+
raise ValueError("Transformation Mode must be provided.")
351+
349352
if not self.name:
350353
self.name = self._transformation_function_name
351354

352355
if self.name != self._transformation_function_name:
353-
raise ValueError(
354-
"Transformation name and function name must be same."
355-
)
356+
raise ValueError("Transformation name and function name must be same.")
356357

357358
payload = deepcopy(self._spec)
358359
payload.pop("id", None)

0 commit comments

Comments
 (0)