Skip to content

Commit d1007c1

Browse files
committed
updated release notes
2 parents e333db0 + 8ff17b7 commit d1007c1

32 files changed

+776
-609
lines changed

ads/feature_store/common/enums.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ class FeatureType(Enum):
295295
STRING_BINARY_MAP = "STRING_BINARY_MAP"
296296
STRING_BOOLEAN_MAP = "STRING_BOOLEAN_MAP"
297297
UNKNOWN = "UNKNOWN"
298+
COMPLEX = "COMPLEX"
298299

299300

300301
class EntityType(Enum):

ads/feature_store/common/spark_session_singleton.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
from ads.common.decorator.runtime_dependency import OptionalDependency
1111
import os
12-
1312
from ads.common.oci_client import OCIClientFactory
1413
from ads.feature_store.common.utils.utility import get_env_bool
1514

@@ -33,6 +32,31 @@
3332
raise
3433

3534

35+
def get_env_bool(env_var: str, default: bool = False) -> bool:
36+
"""
37+
:param env_var: Environment variable name
38+
:param default: Default environment variable value
39+
:return: Value of the boolean env variable
40+
"""
41+
env_val = os.getenv(env_var)
42+
if env_val is None:
43+
env_val = default
44+
else:
45+
env_val = env_val.lower()
46+
if env_val == "true":
47+
env_val = True
48+
elif env_val == "false":
49+
env_val = False
50+
else:
51+
raise ValueError(
52+
"For environment variable: {0} only string values T/true or F/false are allowed but: \
53+
{1} was provided.".format(
54+
env_var, env_val
55+
)
56+
)
57+
return env_val
58+
59+
3660
def developer_enabled():
3761
return get_env_bool("DEVELOPER_MODE", False)
3862

@@ -76,6 +100,8 @@ def __init__(self, metastore_id: str = None):
76100
"spark.hadoop.oracle.dcat.metastore.id", metastore_id
77101
).config(
78102
"spark.sql.warehouse.dir", metastore.default_managed_table_location
103+
).config(
104+
"spark.driver.memory", "16G"
79105
)
80106

81107
if developer_enabled():

ads/feature_store/common/utils/feature_schema_mapper.py

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def map_spark_type_to_feature_type(spark_type):
7171
if spark_type in spark_type_to_feature_type:
7272
return spark_type_to_feature_type.get(spark_type)
7373
else:
74-
return FeatureType.UNKNOWN
74+
return FeatureType.COMPLEX
7575

7676

7777
def map_pandas_type_to_feature_type(feature_name, values):
@@ -180,7 +180,7 @@ def map_feature_type_to_spark_type(feature_type):
180180
if feature_type_in in spark_types:
181181
return spark_types.get(feature_type_in)
182182
else:
183-
return "UNKNOWN"
183+
return "COMPLEX"
184184

185185

186186
def get_raw_data_source_schema(raw_feature_details: List[dict]):
@@ -225,31 +225,22 @@ def map_feature_type_to_pandas(feature_type):
225225
FeatureType.INTEGER: "int32",
226226
FeatureType.DECIMAL: "object",
227227
FeatureType.DATE: "object",
228+
FeatureType.STRING_ARRAY: "object",
229+
FeatureType.INTEGER_ARRAY: "object",
230+
FeatureType.LONG_ARRAY: "object",
231+
FeatureType.FLOAT_ARRAY: "object",
232+
FeatureType.DOUBLE_ARRAY: "object",
233+
FeatureType.TIMESTAMP_ARRAY: "object",
234+
FeatureType.BOOLEAN_ARRAY: "object",
235+
# FeatureType.DECIMAL_ARRAY: "object",
236+
FeatureType.DATE_ARRAY: "object",
228237
}
229238
if feature_type_in in supported_feature_type:
230239
return supported_feature_type.get(feature_type_in)
231240
else:
232241
raise TypeError(f"Feature Type {feature_type} is not supported for pandas")
233242

234243

235-
def convert_pandas_datatype_with_schema(
236-
raw_feature_details: List[dict], input_df: pd.DataFrame
237-
):
238-
feature_detail_map = {}
239-
for feature_details in raw_feature_details:
240-
feature_detail_map[feature_details.get("name")] = feature_details
241-
for column in input_df.columns:
242-
if column in feature_detail_map.keys():
243-
feature_details = feature_detail_map[column]
244-
feature_type = feature_details.get("featureType")
245-
pandas_type = map_feature_type_to_pandas(feature_type)
246-
input_df[column] = (
247-
input_df[column]
248-
.astype(pandas_type)
249-
.where(pd.notnull(input_df[column]), None)
250-
)
251-
252-
253244
def map_spark_type_to_stats_data_type(spark_type):
254245
"""Maps the spark data types to MLM library data types
255246
args:

ads/feature_store/common/utils/transformation_query_validator.py

Lines changed: 0 additions & 96 deletions
This file was deleted.

0 commit comments

Comments
 (0)