addressed review comments and enable sql transformation for YAML

K-Kapil-Sharma · K-Kapil-Sharma · commit 5b5ce6094a75 · 2023-06-17T16:08:59.000+05:30
diff --git a/ads/feature_store/feature_store_registrar.py b/ads/feature_store/feature_store_registrar.py
@@ -250,8 +250,9 @@ def _create_transformations(self) -> List[Transformation]:
             transformation.compartment_id = (
                 transformation.compartment_id or self._root_compartment_id
             )
-            # to encode to base64
-            transformation.source_code_function = transformation.source_code_function
+            if transformation.source_code_function:
+                # to encode to base64
+                transformation.source_code_function = transformation.source_code_function
         return self._transformations.create_models(self._progress)
 
     def _create_feature_groups(self) -> List[FeatureGroup]:
diff --git a/ads/feature_store/templates/feature_store_template.yaml b/ads/feature_store/templates/feature_store_template.yaml
@@ -16,6 +16,7 @@ spec:
       spec:
         displayName: '{entity_name}'
 
+  # we can pass sql query in case of spark based sql transformations by passing query inline in transformationQueryInput
   transformation: &transformation
     - kind: transformation
       spec:
diff --git a/ads/feature_store/transformation.py b/ads/feature_store/transformation.py
@@ -389,7 +389,7 @@ def create(self, **kwargs) -> "Transformation":
                 raise ValueError(
                     "Transformation display name and function name must be same."
                 )
-        elif self.transformation_mode == TransformationMode.SQL.value:
+        elif self.transformation_mode.lower() == TransformationMode.SQL.value:
             TransformationQueryValidator.verify_sql_input(self.transformation_query_input,
                                                           self.CONST_DATA_SOURCE_TRANSFORMATION_INPUT)
             # convert it to transformation function to ensure the integrity with backend
diff --git a/tests/data/feature_store_minimal.yaml b/tests/data/feature_store_minimal.yaml
@@ -16,7 +16,7 @@ spec:
     - kind: transformation
       spec:
         displayName: transactions_df
-        transformationMode: SPARK_SQL
+        transformationMode: SQL
         sourceCode: '
         def transactions_df(transactions_batch):
           sql_query = f"""
diff --git a/tests/integration/feature_store/test_base.py b/tests/integration/feature_store/test_base.py
@@ -454,7 +454,7 @@ def clean_up_transformation(transformation):
         try:
             transformation.delete()
         except Exception as ex:
-            print("Failed to delete feature group: ", str(ex))
+            print("Failed to delete transformation: ", str(ex))
             exit(1)
 
     @staticmethod
diff --git a/tests/integration/feature_store/test_transformation.py b/tests/integration/feature_store/test_transformation.py
@@ -10,6 +10,7 @@
 from ads.feature_store.feature_group import FeatureGroup
 from ads.feature_store.common.enums import TransformationMode
 from tests.integration.feature_store.test_base import FeatureStoreTestCase
+from ads.feature_store.common.spark_session_singleton import SparkSessionSingleton
 
 
 class TestFeatureStoreTransformation(FeatureStoreTestCase):
@@ -18,7 +19,7 @@ class TestFeatureStoreTransformation(FeatureStoreTestCase):
     valid_spark_queries = [
         "SELECT requisitionId, length(title) As title_word_count,"
         " CASE When length(title) > 0 Then 0 Else 1 End As empty_title,"
-        " length(description) As description_word_count," \
+        " length(description) As description_word_count,"
         " length(designation) As designation_word_count FROM DATA_SOURCE_INPUT",
         "SELECT user_id, credit_score FROM DATA_SOURCE_INPUT",
         "SELECT  country, city, zipcode, state FROM DATA_SOURCE_INPUT WHERE state in ('PR', 'AZ', 'FL') order by state",
@@ -104,7 +105,19 @@ def test_transformation_query_with_feature_group_job(self):
         )
         assert fg.oci_feature_group.id
 
-        fg.materialise(self.data)
+        # convert pandas to spark dataframe to run SPARK SQL transformation mode
+        spark = SparkSessionSingleton().get_spark_session()
+        spark_df = spark.createDataFrame(self.data)
+        # get item count
+        item_count = spark_df.count()
+        # materialise to delta table
+        fg.materialise(spark_df)
+        # read dataframe
+        df = fg.select().read()
+        # assert dataframe
+        assert df
+        # assert count
+        assert df.count() == item_count
 
         self.clean_up_feature_group(fg)
         self.clean_up_transformation(transformation)

Original file line number	Diff line number	Diff line change
`@@ -389,7 +389,7 @@ def create(self, **kwargs) -> "Transformation":`
`389`	`389`	`raise ValueError(`
`390`	`390`	`"Transformation display name and function name must be same."`
`391`	`391`	`)`
`392`		`- elif self.transformation_mode == TransformationMode.SQL.value:`
	`392`	`+ elif self.transformation_mode.lower() == TransformationMode.SQL.value:`
`393`	`393`	`TransformationQueryValidator.verify_sql_input(self.transformation_query_input,`
`394`	`394`	`self.CONST_DATA_SOURCE_TRANSFORMATION_INPUT)`
`395`	`395`	`# convert it to transformation function to ensure the integrity with backend`