@@ -245,6 +245,16 @@ def id(self) -> str:
245245 """
246246 return self .get_spec (self .CONST_ID )
247247
248+ @property
249+ def features (self ) -> List [DatasetFeature ]:
250+ return [
251+ DatasetFeature (** feature_dict )
252+ for feature_dict in self .get_spec (self .CONST_OUTPUT_FEATURE_DETAILS )[
253+ self .CONST_ITEMS
254+ ]
255+ or []
256+ ]
257+
248258 def with_id (self , id : str ) -> "Dataset" :
249259 return self .set_spec (self .CONST_ID , id )
250260
@@ -709,6 +719,28 @@ def delete(self):
709719
710720 dataset_execution_strategy .delete_dataset (self , dataset_job )
711721
722+ def get_features (self ) -> List [DatasetFeature ]:
723+ """
724+ Returns all the features in the dataset.
725+
726+ Returns:
727+ List[DatasetFeature]
728+ """
729+
730+ return self .features
731+
732+ def get_features_df (self ) -> "pandas.DataFrame" :
733+ """
734+ Returns all the features as pandas dataframe.
735+
736+ Returns:
737+ pandas.DataFrame
738+ """
739+ records = []
740+ for feature in self .features :
741+ records .append ({"name" : feature .feature_name , "type" : feature .feature_type })
742+ return pandas .DataFrame .from_records (records )
743+
712744 def update (self , ** kwargs ) -> "Dataset" :
713745 """Updates Dataset in the feature store.
714746
@@ -752,7 +784,18 @@ def _update_from_oci_dataset_model(self, oci_dataset: OCIDataset) -> "Dataset":
752784
753785 for infra_attr , dsc_attr in self .attribute_map .items ():
754786 if infra_attr in dataset_details :
755- self .set_spec (infra_attr , dataset_details [infra_attr ])
787+ if infra_attr == self .CONST_OUTPUT_FEATURE_DETAILS :
788+ # May not need if we fix the backend and add dataset_id to the output_feature
789+ features_list = []
790+ for output_feature in dataset_details [infra_attr ]["items" ]:
791+ output_feature ["datasetId" ] = dataset_details [self .CONST_ID ]
792+ features_list .append (output_feature )
793+
794+ value = {self .CONST_ITEMS : features_list }
795+ else :
796+ value = dataset_details [infra_attr ]
797+
798+ self .set_spec (infra_attr , value )
756799
757800 return self
758801
@@ -792,6 +835,33 @@ def materialise(
792835
793836 dataset_execution_strategy .ingest_dataset (self , dataset_job )
794837
838+ def get_last_job (self ) -> "DatasetJob" :
839+ """Gets the Job details for the last running Dataset job.
840+
841+ Returns:
842+ DatasetJob
843+ """
844+
845+ if not self .id :
846+ raise ValueError (
847+ "Dataset needs to be saved to the feature store before getting associated jobs."
848+ )
849+
850+ if not self .job_id :
851+ ds_job = DatasetJob .list (
852+ dataset_id = self .id ,
853+ compartment_id = self .compartment_id ,
854+ sort_by = "timeCreated" ,
855+ limit = "1" ,
856+ )
857+ if not ds_job :
858+ raise ValueError (
859+ "Unable to retrieve the associated last job. Please make sure you materialized the data."
860+ )
861+ self .with_job_id (ds_job [0 ].id )
862+ return ds_job [0 ]
863+ return DatasetJob .from_id (self .job_id )
864+
795865 @deprecated (details = "preview functionality is deprecated. Please use as_of." )
796866 def preview (
797867 self ,
@@ -947,14 +1017,8 @@ def get_statistics(self, job_id: str = None) -> "Statistics":
9471017 raise ValueError (
9481018 "Dataset needs to be saved to the feature store before retrieving the statistics"
9491019 )
950- stat_job_id = job_id
951- if job_id is None :
952- if self .job_id is None :
953- raise ValueError (
954- "Unable to retrieve the last job,please provide the job id,make sure you materialised the data'"
955- )
956- else :
957- stat_job_id = self .job_id
1020+
1021+ stat_job_id = job_id if job_id is not None else self .get_last_job ().id
9581022
9591023 # TODO: take the one in memory or will list down job ids and find the latest
9601024 dataset_job = DatasetJob .from_id (stat_job_id )
@@ -980,14 +1044,8 @@ def get_validation_output(self, job_id: str = None) -> "ValidationOutput":
9801044 raise ValueError (
9811045 "Dataset needs to be saved to the feature store before retrieving the validation report"
9821046 )
983- validation_job_id = job_id
984- if job_id is None :
985- if self .job_id is None :
986- raise ValueError (
987- "Unable to retrieve the last job,please provide the job id,make sure you materialised the data'"
988- )
989- else :
990- validation_job_id = self .job_id
1047+
1048+ validation_job_id = job_id if job_id is not None else self .get_last_job ().id
9911049
9921050 # retrieve the validation output JSON from data_flow_batch_execution_output
9931051 dataset_job = DatasetJob .from_id (validation_job_id )
0 commit comments