oracle
diff --git a/‎.gitleaks.toml‎
Lines changed: 4 additions & 2 deletions b/‎.gitleaks.toml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/common/oci_client.py‎
Lines changed: 6 additions & 3 deletions b/‎ads/common/oci_client.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎ads/common/oci_mixin.py‎
Lines changed: 3 additions & 3 deletions b/‎ads/common/oci_mixin.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎ads/common/serializer.py‎
Lines changed: 9 additions & 0 deletions b/‎ads/common/serializer.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎ads/feature_store/dataset.py‎
Lines changed: 1 addition & 5 deletions b/‎ads/feature_store/dataset.py‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎ads/feature_store/docs/source/conf.py‎
Lines changed: 2 additions & 2 deletions b/‎ads/feature_store/docs/source/conf.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ads/feature_store/docs/source/dataset.rst‎
Lines changed: 74 additions & 17 deletions b/‎ads/feature_store/docs/source/dataset.rst‎
Lines changed: 74 additions & 17 deletions
diff --git a/‎ads/feature_store/docs/source/feature_group.rst‎
Lines changed: 51 additions & 3 deletions b/‎ads/feature_store/docs/source/feature_group.rst‎
Lines changed: 51 additions & 3 deletions
diff --git a/‎ads/feature_store/docs/source/feature_validation.rst‎
Lines changed: 56 additions & 0 deletions b/‎ads/feature_store/docs/source/feature_validation.rst‎
Lines changed: 56 additions & 0 deletions
@@ -9,11 +9,13 @@ useDefault = true
 # Paths listed in allowlist will not be scanned.
 [allowlist]
     description = "Global allow list"
-    stopwords = ["test_password", "sample_key"]
     regexes = [
         '''example-password''',
         '''this-is-not-the-secret''',
-        '''<redacted>'''
+        '''<redacted>''',
+        # NVIDIA_GPGKEY_SUM from public documentation:
+        # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/centos7/base/Dockerfile
+        '''d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87'''
     ]
     paths = [
         '''tests/integration/tests_configs.yaml'''
 
@@ -40,7 +40,7 @@ repos:
     -   id: gitleaks
 # Oracle copyright checker
 -   repo: https://github.com/oracle-samples/oci-data-science-ai-samples/
-    rev: cbe0136
+    rev: cbe0136f7aaffe463b31ddf3f34b0e16b4b124ff
     hooks:
     -   id: check-copyright
         name: check-copyright
 
@@ -17,8 +17,6 @@
 from oci.resource_search import ResourceSearchClient
 from oci.secrets import SecretsClient
 from oci.vault import VaultsClient
-from oci.feature_store import FeatureStoreClient
-
 logger = logging.getLogger(__name__)
 
 
@@ -65,10 +63,15 @@ def _client_impl(self, client):
             "ai_language": AIServiceLanguageClient,
             "data_labeling_dp": DataLabelingClient,
             "data_labeling_cp": DataLabelingManagementClient,
-            "feature_store": FeatureStoreClient,
             "resource_search": ResourceSearchClient,
             "data_catalog": DataCatalogClient
         }
+        try:
+            from oci.feature_store import FeatureStoreClient
+            client_map["feature_store"] = FeatureStoreClient
+        except ImportError:
+            logger.warning("OCI SDK with feature store support is not installed")
+            pass
 
         assert (
             client in client_map
 
@@ -230,7 +230,7 @@ def _parse_kwargs(attribute_map: dict, **kwargs):
 
         return parsed_kwargs
 
-    @classmethod
+    @class_or_instance_method
     def deserialize(cls, data, to_cls):
         """De-serialize data from dictionary to an OCI model"""
         if cls.type_mappings is None:
@@ -549,7 +549,7 @@ def from_dict(cls, data):
         """
         return cls.create_instance(**data)
 
-    @classmethod
+    @class_or_instance_method
     def deserialize(cls, data: dict, to_cls: str = None):
         """Deserialize data
 
@@ -726,7 +726,7 @@ def update_from_oci_model(
         for attr in self.swagger_types.keys():
             if (
                 hasattr(oci_model_instance, attr)
-                and getattr(oci_model_instance, attr) is not None
+                and getattr(oci_model_instance, attr)
                 and (
                     not hasattr(self, attr)
                     or not getattr(self, attr)
 
@@ -15,6 +15,7 @@
 import yaml
 
 from ads.common import logger
+from ads.common.auth import default_signer
 
 try:
     from yaml import CSafeDumper as dumper
@@ -134,6 +135,14 @@ def _read_from_file(uri: str, **kwargs) -> str:
         -------
         string: Contents in file specified by URI
         """
+        # Add default signer if the uri is an object storage uri, and
+        # the user does not specify config or signer.
+        if (
+            uri.startswith("oci://")
+            and "config" not in kwargs
+            and "signer" not in kwargs
+        ):
+            kwargs.update(default_signer())
         with fsspec.open(uri, "r", **kwargs) as f:
             return f.read()
 
 
@@ -896,11 +896,7 @@ def get_validation_output(self, job_id: str = None) -> "ValidationOutput":
         validation_output = (
             output_details.get("validationOutput") if output_details else None
         )
-        validation_output_json = (
-            json.loads(validation_output) if validation_output else None
-        )
-
-        return ValidationOutput(validation_output_json)
+        return ValidationOutput(validation_output)
 
     @classmethod
     def list_df(cls, compartment_id: str = None, **kwargs) -> "pandas.DataFrame":
 
@@ -9,7 +9,7 @@
 
 sys.path.insert(0, os.path.abspath("../../"))
 
-version = "1.0"
+version = "1.2"
 release = version
 
 
@@ -54,7 +54,7 @@
 
 # Get version
 
-version = "1.0"
+version = "1.2"
 release = version
 
 # Unless we want to expose real buckets and namespaces
 
@@ -119,46 +119,97 @@ With a Dataset instance, we can get the last dataset job details using ``get_las
 
 .. code-block:: python3
 
-  # Fetch validation results for a dataset
   dataset_job = dataset.get_last_job()
-  df = dataset_job.get_validation_output().to_dataframe()
-  df.show()
-
 
 Save expectation entity
 =======================
+Feature store allows you to define expectations on data being materialized into dataset instance.With a ``Dataset`` instance, You can save the expectation details using ``with_expectation_suite()`` with parameters
 
-With a Dataset instance, we can save the expectation entity using ``save_expectation()``
+- ``expectation_suite: ExpectationSuite``. ExpectationSuit of great expectation
+- ``expectation_type: ExpectationType``. Type of expectation
+        - ``ExpectationType.STRICT``: Fail the job if expectation not met
+        - ``ExpectationType.LENIENT``: Pass the job even if expectation not met
 
 .. note::
 
   Great Expectations is a Python-based open-source library for validating, documenting, and profiling your data. It helps you to maintain data quality and improve communication about data between teams. Software developers have long known that automated testing is essential for managing complex codebases.
 
 .. image:: figures/validation.png
 
+.. code-block:: python3
 
-The ``.save_expectation()`` method takes the following optional parameter:
+    expectation_suite = ExpectationSuite(
+        expectation_suite_name="expectation_suite_name"
+    )
+    expectation_suite.add_expectation(
+        ExpectationConfiguration(
+            expectation_type="expect_column_values_to_not_be_null",
+            kwargs={"column": "<column>"},
+        )
+
+    dataset_resource = (
+            Dataset()
+            .with_description("dataset description")
+            .with_compartment_id(<compartment_id>)
+            .with_name(<name>)
+            .with_entity_id(entity_id)
+            .with_feature_store_id(feature_store_id)
+            .with_query(f"SELECT * FROM `{entity_id}`.{feature_group_name}")
+            .with_expectation_suite(
+                expectation_suite=expectation_suite,
+                expectation_type=ExpectationType.STRICT,
+            )
+        )
+
+You can call the ``get_validation_output()`` method of the Dataset instance to fetch validation results for a specific ingestion job.
+The ``get_validation_output()`` method takes the following optional parameter:
 
-- ``expectation_suite: ExpectationSuite``. Expectation suite of great expectation
-- ``expectation_type: ExpectationType``. Type of expectation
-        - ``ExpectationType.STRICT``: Fail the job if expectation not met
-        - ``ExpectationType.LENIENT``: Pass the job even if expectation not met
+- ``job_id: string``. Id of dataset job
+
+``get_validation_output().to_pandas()`` will output  the validation results for each expectation as pandas dataframe
+
+.. image:: figures/dataset_validation_results.png
+
+``get_validation_output().to_summary()`` will output the overall summary of validation as pandas dataframe.
+
+.. image:: figures/dataset_validation_summary.png
+
+.. seealso::
+
+    :ref:`Feature Validation`
+
+Statistics Computation
+========================
+During the materialization feature store performs computation of statistical metrics for all the features  by default. This can be configured using ``StatisticsConfig`` object which can be passed at the creation of
+dataset or it can be updated later as well.
 
 .. code-block:: python3
 
-  dataset.save_expectation(expectation_suite, expectation_type="STRICT")
+  # Define statistics configuration for selected features
+  stats_config = StatisticsConfig().with_is_enabled(True).with_columns(["column1", "column2"])
 
 
-Statistics Results
-==================
-You can call the ``get_statistics()`` method of the Dataset instance to fetch feature statistics results of a dataset job.
+This can be used with dataset instance.
 
-.. note::
+.. code-block:: python3
 
-  PyDeequ is a Python API for Deequ, a library built on top of Apache Spark for defining "unit tests for data", which measure data quality in large datasets.
+  from ads.feature_store.dataset import Dataset
 
+  dataset = (
+        Dataset
+        .with_name("<dataset_name>")
+        .with_entity_id(<entity_id>)
+        .with_feature_store_id("<feature_store_id>")
+        .with_description("<dataset_description>")
+        .with_compartment_id("<compartment_id>")
+        .with_dataset_ingestion_mode(DatasetIngestionMode.SQL)
+        .with_query('SELECT col FROM <entity_id>.<feature_group_name>')
+        .with_statistics_config(stats_config)
+  )
+
+You can call the ``get_statistics()`` method of the dataset to fetch metrics for a specific ingestion job.
 
-The ``.get_statistics()`` method takes the following optional parameter:
+The ``get_statistics()`` method takes the following optional parameter:
 
 - ``job_id: string``. Id of dataset job
 
@@ -167,6 +218,12 @@ The ``.get_statistics()`` method takes the following optional parameter:
   # Fetch stats results for a dataset job
   df = dataset.get_statistics(job_id).to_pandas()
 
+.. image:: figures/dataset_statistics.png
+
+.. seealso::
+
+    :ref:`Statistics`
+
 
 Get features
 ============
 
@@ -191,10 +191,54 @@ With a ``FeatureGroup`` instance, You can save the expectation details using ``w
     )
 
 You can call the ``get_validation_output()`` method of the FeatureGroup instance to fetch validation results for a specific ingestion job.
+The ``get_validation_output()`` method takes the following optional parameter:
 
-Statistics Results
-==================
-You can call the ``get_statistics()`` method of the FeatureGroup instance to fetch statistics for a specific ingestion job.
+- ``job_id: string``. Id of feature group job
+``get_validation_output().to_pandas()`` will output  the validation results for each expectation as pandas dataframe
+
+.. image:: figures/validation_results.png
+
+``get_validation_output().to_summary()`` will output the overall summary of validation as pandas dataframe.
+
+.. image:: figures/validation_summary.png
+.. seealso::
+
+   :ref:`Feature Validation`
+
+
+Statistics Computation
+========================
+During the materialization feature store performs computation of statistical metrics for all the features  by default. This can be configured using ``StatisticsConfig`` object which can be passed at the creation of
+feature group or it can be updated later as well.
+
+.. code-block:: python3
+
+  # Define statistics configuration for selected features
+  stats_config = StatisticsConfig().with_is_enabled(True).with_columns(["column1", "column2"])
+
+
+This can be used with feature group instance.
+
+.. code-block:: python3
+
+  # Fetch stats results for a feature group job
+  from ads.feature_store.feature_group import FeatureGroup
+
+  feature_group_resource = (
+    FeatureGroup()
+    .with_feature_store_id(feature_store.id)
+    .with_primary_keys(["<key>"])
+    .with_name("<name>")
+    .with_entity_id(entity.id)
+    .with_compartment_id(<compartment_id>)
+    .with_schema_details_from_dataframe(<dataframe>)
+    .with_statistics_config(stats_config)
+
+You can call the ``get_statistics()`` method of the feature group to fetch metrics for a specific ingestion job.
+
+The ``get_statistics()`` method takes the following optional parameter:
+
+- ``job_id: string``. Id of feature group job
 
 .. code-block:: python3
 
@@ -203,6 +247,10 @@ You can call the ``get_statistics()`` method of the FeatureGroup instance to fet
 
 .. image:: figures/stats_1.png
 
+.. seealso::
+
+    :ref:`Statistics`
+
 Get last feature group job
 ==========================
 Feature group job is the execution instance of a feature group. Each feature group job will include validation results and statistics results.
 
@@ -0,0 +1,56 @@
+.. _Feature Validation:
+
+Feature Validation
+******************
+
+Feature validation is the process of checking the quality and accuracy of the features used in a machine learning model. This is important because features that are not accurate or reliable can lead to poor model performance.
+Feature store allows you to define expectation on the data which is being materialized into feature group and dataset. This is achieved using open source library Great Expectations.
+
+.. note::
+  `Great Expectations <https://docs.greatexpectations.io/docs/0.15.50/>`_ is a Python-based open-source library for validating, documenting, and profiling your data. It helps you to maintain data quality and improve communication about data between teams. Software developers have long known that automated testing is essential for managing complex codebases.
+
+
+Expectations
+============
+An Expectation is a verifiable assertion about your data. You can define expectation as below:
+
+.. code-block:: python3
+
+    from great_expectations.core.expectation_configuration import ExpectationConfiguration
+
+    # Create an Expectation
+    expect_config = ExpectationConfiguration(
+        # Name of expectation type being added
+        expectation_type="expect_table_columns_to_match_ordered_list",
+        # These are the arguments of the expectation
+        # The keys allowed in the dictionary are Parameters and
+        # Keyword Arguments of this Expectation Type
+        kwargs={
+            "column_list": [
+                "column1",
+                "column2",
+                "column3",
+                "column4",
+            ]
+        },
+        # This is how you can optionally add a comment about this expectation.
+        meta={
+            "notes": {
+                "format": "markdown",
+                "content": "details about this expectation. **Markdown** `Supported`",
+            }
+        },
+    )
+
+Expectations Suite
+===================
+
+Expectation Suite is a collection of verifiable assertions i.e. expectations about your data. You can define expectation suite as below:
+
+.. code-block:: python3
+
+    # Create an Expectation Suite
+    expectation_suite = ExpectationSuite(
+        expectation_suite_name=<expectation_suite_name>
+    )
+    expectation_suite.add_expectation(expect_config)