1010# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111# ANY KIND, either express or implied. See the License for the specific
1212# language governing permissions and limitations under the License.
13- """The FeatureGroup entity for FeatureStore."""
13+ """The FeatureGroup entity for FeatureStore.
14+
15+ A feature group is a logical grouping of features, defined in the Feature Store,
16+ to describe records. A feature group definition is composed of a list of feature definitions,
17+ a record identifier name, and configurations for its online and offline store.
18+ Create feature group, describe feature group, update feature groups, delete feature group and
19+ list feature groups APIs can be used to manage feature groups.
20+ """
21+
1422from __future__ import absolute_import
1523
1624import logging
4553
4654@attr .s
4755class AthenaQuery :
48- """Class to manager querying of feature store data with AWS Athena
56+ """Class to manage querying of feature store data with AWS Athena.
57+
58+ This class instantiates a AthenaQuery object that is used to retrieve data from feature store
59+ via standard SQL queries.
4960
5061 Attributes:
5162 catalog (str): name of the data catalog.
@@ -63,12 +74,15 @@ class AthenaQuery:
6374 _result_file_prefix : str = attr .ib (init = False , default = None )
6475
6576 def run (self , query_string : str , output_location : str , kms_key : str = None ) -> str :
66- """Run athena query with the given query_string
77+ """Execute a SQL query given a query string, output location and kms key.
78+
79+ This method executes the SQL query using Athena and outputs the results to output_location
80+ and returns the execution id of the query.
6781
6882 Args:
6983 query_string: SQL query string.
70- output_location: s3 uri of the query result.
71- kms_key: KMS key id, if set will be used to encrypt the query result file.
84+ output_location: S3 URI of the query result.
85+ kms_key: KMS key id. If set, will be used to encrypt the query result file.
7286
7387 Returns:
7488 Execution id of the query.
@@ -103,7 +117,7 @@ def get_query_execution(self) -> Dict[str, Any]:
103117 )
104118
105119 def as_dataframe (self ) -> DataFrame :
106- """Download the result of the current query and load it into a DataFrame
120+ """Download the result of the current query and load it into a DataFrame.
107121
108122 Returns:
109123 A pandas DataFrame contains the query result.
@@ -132,10 +146,12 @@ def as_dataframe(self) -> DataFrame:
132146class IngestionManagerPandas :
133147 """Class to manage the multi-threaded data ingestion process.
134148
149+ This class will manage the data ingestion process which is multi-threaded.
150+
135151 Attributes:
136152 feature_group_name (str): name of the Feature Group.
137153 sagemaker_session (Session): instance of the Session class to perform boto calls.
138- data_frame (DataFrame): pandas data_frame to be ingested to the given feature group.
154+ data_frame (DataFrame): pandas DataFrame to be ingested to the given feature group.
139155 max_works (int): number of threads to create.
140156 """
141157
@@ -201,9 +217,8 @@ def run(self, wait=True, timeout=None):
201217 Args:
202218 wait (bool): whether to wait for the ingestion to finish or not.
203219 timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
204- if timeout is reached.
220+ if timeout is reached.
205221 """
206-
207222 executor = ThreadPoolExecutor (max_workers = self .max_workers )
208223 batch_size = math .ceil (self .data_frame .shape [0 ] / self .max_workers )
209224
@@ -230,7 +245,10 @@ def run(self, wait=True, timeout=None):
230245
231246@attr .s
232247class FeatureGroup :
233- """FeatureGroup for FeatureStore
248+ """FeatureGroup definition.
249+
250+ This class instantiates a FeatureGroup object that comprises of a name for the FeatureGroup,
251+ session instance, and a list of feature definition objects i.e., FeatureDefinition.
234252
235253 Attributes:
236254 name (str): name of the FeatureGroup instance.
@@ -282,7 +300,7 @@ def create(
282300 description : str = None ,
283301 tags : List [Dict [str , str ]] = None ,
284302 ) -> Dict [str , Any ]:
285- """Creates a SageMaker FeatureStore FeatureGroup
303+ """Create a SageMaker FeatureStore FeatureGroup.
286304
287305 Args:
288306 s3_uri (str): S3 URI of the offline store.
@@ -300,7 +318,6 @@ def create(
300318 Returns:
301319 Response dict from service.
302320 """
303-
304321 create_feature_store_args = dict (
305322 feature_group_name = self .name ,
306323 record_identifier_name = record_identifier_name ,
@@ -336,7 +353,7 @@ def create(
336353 return self .sagemaker_session .create_feature_group (** create_feature_store_args )
337354
338355 def delete (self ):
339- """Deletes a FeatureGroup"""
356+ """Delete a FeatureGroup. """
340357 self .sagemaker_session .delete_feature_group (feature_group_name = self .name )
341358
342359 def describe (self , next_token : str = None ) -> Dict [str , Any ]:
@@ -354,7 +371,7 @@ def load_feature_definitions(
354371 self ,
355372 data_frame : DataFrame ,
356373 ) -> Sequence [FeatureDefinition ]:
357- """Loads feature definitions from a Pandas DataFrame
374+ """Load feature definitions from a Pandas DataFrame.
358375
359376 Column name is used as feature name. Feature type is inferred from the dtype
360377 of the column. Dtype int_, int8, int16, int32, int64, uint8, uint16, uint32
@@ -389,7 +406,7 @@ def load_feature_definitions(
389406 return self .feature_definitions
390407
391408 def put_record (self , record : Sequence [FeatureValue ]):
392- """Puts a single record in the FeatureGroup
409+ """Put a single record in the FeatureGroup.
393410
394411 Args:
395412 record (Sequence[FeatureValue]): a list contains feature values.
@@ -430,7 +447,7 @@ def ingest(
430447 return manager
431448
432449 def athena_query (self ) -> AthenaQuery :
433- """Creates an AthenaQuery instance
450+ """Create an AthenaQuery instance.
434451
435452 Returns:
436453 An instance of AthenaQuery initialized with data catalog configurations.
@@ -449,10 +466,11 @@ def athena_query(self) -> AthenaQuery:
449466 raise RuntimeError ("No metastore is configured with this feature group." )
450467
451468 def as_hive_ddl (self , database : str = "sagemaker_featurestore" , table_name : str = None ) -> str :
452- """Generate DDL can be used to create Hive table
469+ """Generate Hive DDL commands that can be used to define or change structure of tables or
470+ databases in Hive.
453471
454472 Schema of the table is generated based on the feature definitions. Columns are named
455- after feature name and data-type are infered based on feature type. Integral feature
473+ after feature name and data-type are inferred based on feature type. Integral feature
456474 type is mapped to INT data-type. Fractional feature type is mapped to FLOAT data-type.
457475 String feature type is mapped to STRING data-type.
458476
@@ -464,7 +482,6 @@ def as_hive_ddl(self, database: str = "sagemaker_featurestore", table_name: str
464482 Returns:
465483 Generated create table DDL string.
466484 """
467-
468485 if not table_name :
469486 table_name = self .name
470487
0 commit comments