Merge remote-tracking branch 'origin/feature/feature-store-marketplace-operator' into feature/feature-store-marketplace-operator

harsh97 · harsh97 · commit c22d7a6da9eb · 2024-01-05T14:46:41.000+05:30
diff --git a/ads/feature_store/common/utils/transformation_utils.py b/ads/feature_store/common/utils/transformation_utils.py
@@ -52,9 +52,7 @@ def apply_transformation(
         # Execute the function under namespace
         execution_namespace = {}
         exec(transformation_function, execution_namespace)
-        transformation_function_caller = execution_namespace.get(
-            transformation.name
-        )
+        transformation_function_caller = execution_namespace.get(transformation.name)
         transformed_data = None
 
         transformation_kwargs_dict = json.loads(transformation_kwargs)
diff --git a/ads/feature_store/docs/source/dataset.rst b/ads/feature_store/docs/source/dataset.rst
@@ -27,7 +27,6 @@ The following example defines a dataset and gives it a name. A ``Dataset`` insta
         .with_feature_store_id("<feature_store_id>")
         .with_description("<dataset_description>")
         .with_compartment_id("<compartment_id>")
-        .with_dataset_ingestion_mode(DatasetIngestionMode.SQL)
         .with_query('SELECT col FROM <entity_id>.<feature_group_name>')
     )
 
@@ -56,12 +55,50 @@ Use the the ``create()`` method of the ``Dataset`` instance to create a dataset.
 
 .. important::
 
- This method doesn’t persist any metadata or feature data in the Feature Store. To persist the dataset and save feature data including the metadata in the Feature Store, use the ``materialise()`` method with a dataframe.
+ This method does not persist any metadata or feature data in the Feature Store. To persist the dataset and save feature data including the metadata in the Feature Store, use the ``materialise()`` method with a dataframe. For simple queries with only one level of nesting, users do not need to define ``with_feature_groups``. However, in complex queries involving more than one level of nesting, users are required to define ``with_feature_groups``.
 
-.. code-block:: python3
 
-  # Create an dataset
-  dataset.create()
+.. tabs::
+
+  .. code-tab:: Python3
+    :caption: Simple SQL
+
+    from ads.feature_store.dataset import Dataset
+
+    dataset = (
+        Dataset
+        .with_name("<dataset_name>")
+        .with_entity_id(<entity_id>)
+        .with_feature_store_id("<feature_store_id>")
+        .with_description("<dataset_description>")
+        .with_compartment_id("<compartment_id>")
+        .with_query('SELECT col FROM <entity_id>.<feature_group_name>')
+    )
+
+    dataset.create()
+
+
+  .. code-tab:: Python3
+    :caption: Complex SQL
+
+    from ads.feature_store.dataset import Dataset
+    from ads.feature_store.feature_group import FeatureGroup
+
+    feature_group = FeatureGroup.from_id("<unique_id>")
+
+    dataset = (
+        Dataset
+        .with_name("<dataset_name>")
+        .with_entity_id(<entity_id>)
+        .with_feature_store_id("<feature_store_id>")
+        .with_description("<dataset_description>")
+        .with_compartment_id("<compartment_id>")
+        .with_query('SELECT col FROM (SELECT col FROM <entity_id>.<feature_group_name> WHERE condition = 'some_condition') AS nested_table;')
+        .with_feature_groups([feature_group])
+    )
+
+    # Create an dataset
+    dataset.create()
 
 
 Load
diff --git a/ads/feature_store/docs/source/feature_group.rst b/ads/feature_store/docs/source/feature_group.rst
@@ -11,6 +11,9 @@ In feature group, three key types play crucial roles:
 - **Partition Keys**: These keys assist in distributing and organizing data across different partitions in the feature group. They enable efficient data retrieval by logically grouping related information, optimizing query performance, and minimizing resource utilization.
 - **Event Timestamp Keys**: These keys capture the temporal aspect of the data by indicating the time or timestamp associated with a specific event or entry in the feature group. They facilitate time-based querying, trend analysis, and time-series operations.
 
+.. important::
+    By default, special characters such as spaces and any of the characters ,;{}()\n\t= are not supported in feature group column names.
+
 Define
 ======
 
@@ -353,6 +356,17 @@ Feature store provides an API similar to Pandas to join feature groups together,
   # Filter feature group
   feature_group.filter(feature_group.col1 > 10).show()
 
+  # Filter feature group with get_feature
+  feature_group.filter(feature_group.get_feature("col1") > 10).show()
+
+
+Get Feature
+===========
+Use the ``get_feature()`` method of the ``FeatureGroup`` instance to return the description of feature.
+
+.. code-block:: python3
+
+    feature_group.get_feature("col1")
 
 Preview
 =======
diff --git a/ads/feature_store/docs/source/transformation.rst b/ads/feature_store/docs/source/transformation.rst
@@ -14,6 +14,8 @@ Transformations in a Feature Store refer to the operations and processes applied
   .. code-tab:: Python3
     :caption: TransformationMode.SQL
 
+    from ads.feature_store.transformation import Transformation,TransformationMode
+
     def transactions_df(transactions_batch):
         sql_query = f"select id, cc_num, amount from {transactions_batch}"
         return sql_query
diff --git a/ads/feature_store/execution_strategy/spark/spark_execution.py b/ads/feature_store/execution_strategy/spark/spark_execution.py
@@ -366,7 +366,10 @@ def update_feature_definition_features(self, feature_group, target_table):
         try:
             # Get the output features
             output_features = get_features(
-                self.spark_engine.get_output_columns_from_table_or_dataframe(table_name=target_table), feature_group.id
+                self.spark_engine.get_output_columns_from_table_or_dataframe(
+                    table_name=target_table
+                ),
+                feature_group.id,
             )
             if output_features:
                 feature_group._with_features(output_features)
@@ -392,7 +395,9 @@ def update_dataset_features(self, dataset, target_table):
         try:
             # Get the output features
             output_features = get_features(
-                output_columns=self.spark_engine.get_output_columns_from_table_or_dataframe(table_name=target_table),
+                output_columns=self.spark_engine.get_output_columns_from_table_or_dataframe(
+                    table_name=target_table
+                ),
                 parent_id=dataset.id,
                 entity_type=EntityType.DATASET,
             )
diff --git a/ads/feature_store/feature_store.py b/ads/feature_store/feature_store.py
@@ -367,9 +367,7 @@ def create_entity(
                 "FeatureStore Resource must be created or saved before creating the entity."
             )
 
-        self.oci_fs_entity = self._build_entity(
-            name, description, compartment_id
-        )
+        self.oci_fs_entity = self._build_entity(name, description, compartment_id)
         return self.oci_fs_entity.create()
 
     def delete_entity(self):
diff --git a/ads/feature_store/transformation.py b/ads/feature_store/transformation.py
@@ -346,13 +346,14 @@ def create(self, **kwargs) -> "Transformation":
         if not self.source_code_function:
             raise ValueError("Transformation source code function must be provided.")
 
+        if not self.transformation_mode:
+            raise ValueError("Transformation Mode must be provided.")
+
         if not self.name:
             self.name = self._transformation_function_name
 
         if self.name != self._transformation_function_name:
-            raise ValueError(
-                "Transformation name and function name must be same."
-            )
+            raise ValueError("Transformation name and function name must be same.")
 
         payload = deepcopy(self._spec)
         payload.pop("id", None)

Original file line number	Diff line number	Diff line change
`@@ -367,9 +367,7 @@ def create_entity(`
`367`	`367`	`"FeatureStore Resource must be created or saved before creating the entity."`
`368`	`368`	`)`
`369`	`369`
`370`		`- self.oci_fs_entity = self._build_entity(`
`371`		`- name, description, compartment_id`
`372`		`- )`
	`370`	`+ self.oci_fs_entity = self._build_entity(name, description, compartment_id)`
`373`	`371`	`return self.oci_fs_entity.create()`
`374`	`372`
`375`	`373`	`def delete_entity(self):`