11#!/usr/bin/env python
22# -*- coding: utf-8; -*-
3- import json
43import logging
54from copy import deepcopy
65from datetime import datetime
76from typing import Dict , List , Union
87
98import pandas
9+ import pandas as pd
1010from great_expectations .core import ExpectationSuite
1111
1212from ads import deprecated
@@ -159,6 +159,7 @@ def __init__(self, spec: Dict = None, **kwargs) -> None:
159159 super ().__init__ (spec = spec , ** deepcopy (kwargs ))
160160 # Specify oci Dataset instance
161161 self .dataset_job = None
162+ self ._is_manual_association : bool = False
162163 self ._spark_engine = None
163164 self .oci_dataset = self ._to_oci_dataset (** kwargs )
164165 self .lineage = OCILineage (** kwargs )
@@ -191,6 +192,16 @@ def spark_engine(self):
191192 self ._spark_engine = SparkEngine (get_metastore_id (self .feature_store_id ))
192193 return self ._spark_engine
193194
195+ @property
196+ def is_manual_association (self ):
197+ collection : DatasetFeatureGroupCollection = self .get_spec (
198+ self .CONST_FEATURE_GROUP
199+ )
200+ if collection and collection .is_manual_association is not None :
201+ return collection .is_manual_association
202+ else :
203+ return self ._is_manual_association
204+
194205 @property
195206 def kind (self ) -> str :
196207 """The kind of the object as showing in a YAML."""
@@ -572,8 +583,18 @@ def with_feature_groups(self, feature_groups: List["FeatureGroup"]) -> "Dataset"
572583 for group in feature_groups :
573584 collection .append (DatasetFeatureGroupSummary (feature_group_id = group .id ))
574585
586+ self ._is_manual_association = True
575587 return self .set_spec (
576- self .CONST_FEATURE_GROUP , DatasetFeatureGroupCollection (items = collection )
588+ self .CONST_FEATURE_GROUP ,
589+ DatasetFeatureGroupCollection (items = collection , is_manual_association = True ),
590+ )
591+
592+ def feature_groups_to_df (self ):
593+ return pd .DataFrame .from_records (
594+ [
595+ feature_group .oci_feature_group .to_df_record ()
596+ for feature_group in self .feature_groups
597+ ]
577598 )
578599
579600 @property
@@ -687,7 +708,7 @@ def show(self, rankdir: str = GraphOrientation.LEFT_RIGHT) -> None:
687708 f"Can't get lineage information for Feature group id { self .id } "
688709 )
689710
690- def create (self , ** kwargs ) -> "Dataset" :
711+ def create (self , validate_sql = True , ** kwargs ) -> "Dataset" :
691712 """Creates dataset resource.
692713
693714 !!! note "Lazy"
@@ -700,6 +721,8 @@ def create(self, **kwargs) -> "Dataset":
700721 kwargs
701722 Additional kwargs arguments.
702723 Can be any attribute that `oci.feature_store.models.Dataset` accepts.
724+ validate_sql:
725+ Boolean value indicating whether to validate sql before creating dataset
703726
704727 Returns
705728 -------
@@ -720,6 +743,9 @@ def create(self, **kwargs) -> "Dataset":
720743 if self .statistics_config is None :
721744 self .statistics_config = StatisticsConfig ()
722745
746+ if validate_sql is True :
747+ self .spark_engine .sql (self .get_spec (self .CONST_QUERY ))
748+
723749 payload = deepcopy (self ._spec )
724750 payload .pop ("id" , None )
725751 logger .debug (f"Creating a dataset resource with payload { payload } " )
0 commit comments