Skip to content

Commit 62840d3

Browse files
committed
Adding feature store beta service
1 parent 7a8425c commit 62840d3

File tree

94 files changed

+16052
-5
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+16052
-5
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ repos:
88
- id: check-json
99
- id: check-merge-conflict
1010
- id: check-yaml
11+
exclude: feature_store_*.yaml
1112
args: ['--allow-multiple-documents']
1213
- id: detect-private-key
1314
- id: end-of-file-fixer
@@ -24,4 +25,4 @@ repos:
2425
- id: rst-backticks
2526
- id: rst-inline-touching-normal
2627

27-
exclude: ^(docs/)
28+
exclude: ^(docs/)

ads/common/decorator/runtime_dependency.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class OptionalDependency:
6464
OPTUNA = "oracle-ads[optuna]"
6565
SPARK = "oracle-ads[spark]"
6666
HUGGINGFACE = "oracle-ads[huggingface]"
67+
GREAT_EXPECTATIONS = "oracle-ads[great-expectations]"
68+
PYDEEQU = "oracle-ads[pydeequ]"
69+
GRAPHVIZ = "oracle-ads[graphviz]"
6770

6871

6972
def runtime_dependency(

ads/common/oci_client.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8; -*-
33

4-
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
4+
# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

77
import logging
88

99
from oci.ai_language import AIServiceLanguageClient
10+
from oci.data_catalog import DataCatalogClient
1011
from oci.data_flow import DataFlowClient
1112
from oci.data_labeling_service import DataLabelingManagementClient
1213
from oci.data_labeling_service_dataplane import DataLabelingClient
1314
from oci.data_science import DataScienceClient
1415
from oci.identity import IdentityClient
1516
from oci.object_storage import ObjectStorageClient
17+
from oci.resource_search import ResourceSearchClient
1618
from oci.secrets import SecretsClient
1719
from oci.vault import VaultsClient
20+
from oci.feature_store import FeatureStoreClient
1821

1922
logger = logging.getLogger(__name__)
2023

@@ -62,6 +65,9 @@ def _client_impl(self, client):
6265
"ai_language": AIServiceLanguageClient,
6366
"data_labeling_dp": DataLabelingClient,
6467
"data_labeling_cp": DataLabelingManagementClient,
68+
"feature_store": FeatureStoreClient,
69+
"resource_search": ResourceSearchClient,
70+
"data_catalog": DataCatalogClient
6571
}
6672

6773
assert (
@@ -124,6 +130,18 @@ def ai_language(self):
124130
def data_labeling_cp(self):
125131
return self.create_client("data_labeling_cp")
126132

133+
@property
134+
def feature_store(self):
135+
return self.create_client("feature_store")
136+
127137
@property
128138
def data_labeling_dp(self):
129139
return self.create_client("data_labeling_dp")
140+
141+
@property
142+
def resource_search(self):
143+
return self.create_client("resource_search")
144+
145+
@property
146+
def data_catalog(self):
147+
return self.create_client("data_catalog")

ads/dataset/progress.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def __init__(self, max_progress=100, description="Running", verbose=False):
4141
self.start_time = time.time()
4242
self.description = description
4343

44-
def update(self, description=None):
44+
def update(self, description=None, n=1):
4545
"""
4646
Updates the progress bar
4747
"""
@@ -57,7 +57,7 @@ def update(self, description=None):
5757
description if description is not None else self.description
5858
)
5959
self.start_time = time.time()
60-
self.progress_bar.update(1)
60+
self.progress_bar.update(n)
6161
if description is not None:
6262
self.progress_bar.set_description(description, refresh=True)
6363

ads/feature_store/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
import logging
8+
9+
logger = logging.getLogger(__name__)

ads/feature_store/common/__init__.py

Whitespace-only changes.

ads/feature_store/common/enums.py

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8; -*-
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
from enum import Enum
8+
9+
10+
class JobStatus(Enum):
11+
"""
12+
An enumeration that represents the supported Job status.
13+
14+
Attributes:
15+
SUCCEEDED (str): A string representation of the state of Succeeded job.
16+
FAILED (str): A string representation of the state of Failed job.
17+
CODE_EXECUTION (str): A string representation of the state of CodeExecution job.
18+
19+
Methods:
20+
None
21+
"""
22+
23+
SUCCEEDED = "SUCCEEDED"
24+
FAILED = "FAILED"
25+
CODE_EXECUTION = "CODE_EXECUTION"
26+
27+
28+
class LevelType(Enum):
29+
"""
30+
An enumeration defining the different types of logging levels.
31+
32+
Attributes:
33+
ERROR (str): A string representing the highest logging level, indicating an error in the program.
34+
WARNING (str): A string representing a lower logging level, indicating a potential issue or warning in the program.
35+
"""
36+
37+
ERROR = "ERROR"
38+
WARNING = "WARNING"
39+
40+
41+
class DatasetIngestionMode(Enum):
42+
"""
43+
An enumeration defining the possible modes for ingesting datasets.
44+
45+
Attributes:
46+
SQL (str): A string representing the SQL mode, which is used to ingest datasets using SQL.
47+
"""
48+
49+
SQL = "SQL"
50+
51+
52+
class IngestionMode(Enum):
53+
"""
54+
An enumeration that represents the supported Ingestion Mode in feature store.
55+
56+
Attributes:
57+
OVERWRITE (str): Ingestion mode to overwrite the data in the system.
58+
APPEND (str): Ingestion mode to append the data in the system.
59+
UPSERT (str): Ingestion mode to insert and update the data in the system.
60+
61+
Methods:
62+
None
63+
"""
64+
65+
OVERWRITE = "OVERWRITE"
66+
APPEND = "APPEND"
67+
DEFAULT = "DEFAULT"
68+
UPSERT = "UPSERT"
69+
70+
71+
class JoinType(Enum):
72+
"""Enumeration of supported SQL join types.
73+
74+
Attributes:
75+
INNER: Inner join.
76+
LEFT: Left join.
77+
RIGHT: Right join.
78+
FULL: Full outer join.
79+
CROSS: Cross join.
80+
LEFT_SEMI_JOIN: Left semi join.
81+
"""
82+
83+
INNER = "INNER"
84+
LEFT = "LEFT"
85+
RIGHT = "RIGHT"
86+
FULL = "FULL"
87+
CROSS = "CROSS"
88+
LEFT_SEMI_JOIN = "LEFT_SEMI_JOIN"
89+
90+
91+
class ExecutionEngine(Enum):
92+
"""
93+
An enumeration that represents the supported execution engines.
94+
95+
Attributes:
96+
SPARK (str): A string representation of the Apache Spark execution engine.
97+
PANDAS (str): A string representation of the Pandas execution engine.
98+
99+
Methods:
100+
None
101+
"""
102+
103+
SPARK = "SPARK"
104+
PANDAS = "PANDAS"
105+
106+
107+
class DataFrameType(Enum):
108+
"""
109+
An enumeration that represents the supported DataFrame types.
110+
111+
Attributes:
112+
SPARK (str): A string representation for spark Data frame type.
113+
PANDAS (str): A string representation for pandas Data frame type.
114+
115+
Methods:
116+
None
117+
"""
118+
119+
SPARK = "SPARK"
120+
PANDAS = "PANDAS"
121+
122+
123+
class ValidationEngineType(Enum):
124+
"""
125+
An enumeration that represents the supported validation engines.
126+
127+
Attributes:
128+
GREAT_EXPECTATIONS (str): A string representation of the great expectation execution engine.
129+
130+
Methods:
131+
None
132+
"""
133+
134+
GREAT_EXPECTATIONS = "GREAT_EXPECTATIONS"
135+
136+
137+
class FeatureStoreJobType(Enum):
138+
"""
139+
An enumeration that represents the Job type.
140+
141+
Attributes:
142+
FEATURE_GROUP_INGESTION (str): A string representing that job is feature group ingestion.
143+
DATASET_INGESTION (str): A string representing that job is dataset ingestion.
144+
FEATURE_GROUP_DELETION (str): A string representing that job is feature group deletion.
145+
DATASET_DELETION (str): A string representing that job is dataset deletion.
146+
147+
Methods:
148+
None
149+
"""
150+
151+
FEATURE_GROUP_INGESTION = "FEATURE_GROUP_INGESTION"
152+
DATASET_INGESTION = "DATASET_INGESTION"
153+
FEATURE_GROUP_DELETION = "FEATURE_GROUP_DELETION"
154+
DATASET_DELETION = "DATASET_DELETION"
155+
156+
157+
class LifecycleState(Enum):
158+
"""
159+
An enumeration that represents the lifecycle state of feature store resources.
160+
161+
Attributes:
162+
ACTIVE (str): A string representing Active resource.
163+
FAILED (str): A string representing Failed resource.
164+
NEEDS_ATTENTION (str): A string representing needs_attention resource.
165+
166+
Methods:
167+
None
168+
"""
169+
170+
ACTIVE = "ACTIVE"
171+
FAILED = "FAILED"
172+
NEEDS_ATTENTION = "NEEDS_ATTENTION"
173+
174+
175+
class JobConfigurationType(Enum):
176+
"""
177+
An enumeration defining the different types of job configuration modes for Spark.
178+
179+
Attributes:
180+
SPARK_BATCH_AUTOMATIC (str): A string representing automatic job configuration mode for Spark Batch jobs.
181+
SPARK_BATCH_MANUAL (str): A string representing manual job configuration mode for Spark Batch jobs.
182+
"""
183+
184+
SPARK_BATCH_AUTOMATIC = "SPARK_BATCH_AUTOMATIC"
185+
SPARK_BATCH_MANUAL = "SPARK_BATCH_MANUAL"
186+
187+
188+
class ExpectationType(Enum):
189+
"""
190+
An enumeration of the available expectation types for a feature store.
191+
192+
Attributes:
193+
STRICT (str): A strict expectation type.
194+
LENIENT (str): A lenient expectation type.
195+
NO_EXPECTATION (str): A no expectation type.
196+
197+
Methods:
198+
None
199+
"""
200+
201+
STRICT = "STRICT"
202+
LENIENT = "LENIENT"
203+
NO_EXPECTATION = "NO_EXPECTATION"
204+
205+
206+
class TransformationMode(Enum):
207+
"""
208+
An enumeration defining the different modes for data transformation.
209+
210+
Attributes:
211+
SQL (str): A string representing the SQL mode, which is used to transform data using SQL queries.
212+
PANDAS (str): A string representing the Pandas mode, which is used to transform data using the Pandas library.
213+
"""
214+
215+
SQL = "sql"
216+
PANDAS = "pandas"
217+
218+
219+
class FilterOperators(Enum):
220+
"""
221+
An enumeration defining the different comparison operators for data filtering.
222+
223+
Attributes:
224+
GE (str): A string representing the greater than or equal to operator.
225+
GT (str): A string representing the greater than operator.
226+
NE (str): A string representing the not equals operator.
227+
EQ (str): A string representing the equals operator.
228+
LE (str): A string representing the less than or equal to operator.
229+
LT (str): A string representing the less than operator.
230+
IN (str): A string representing the in operator.
231+
LK (str): A string representing the like operator.
232+
"""
233+
234+
GE = "GREATER_THAN_OR_EQUAL"
235+
GT = "GREATER_THAN"
236+
NE = "NOT_EQUALS"
237+
EQ = "EQUALS"
238+
LE = "LESS_THAN_OR_EQUAL"
239+
LT = "LESS_THAN"
240+
IN = "IN"
241+
LK = "LIKE"
242+
243+
244+
class FeatureType(Enum):
245+
"""
246+
An enumeration of the available feature types for a feature store.
247+
248+
Attributes:
249+
STRING (str): A string feature type.
250+
INTEGER (str): An integer feature type.
251+
FLOAT (str): A float feature type.
252+
DOUBLE (str): A double feature type.
253+
BOOLEAN (str): A boolean feature type.
254+
DATE (str): A date feature type.
255+
TIMESTAMP (str): A timestamp feature type.
256+
DECIMAL (str): A decimal feature type.
257+
BINARY (str): A binary feature type.
258+
ARRAY (str): An array feature type.
259+
MAP (str): A map feature type.
260+
STRUCT (str): A struct feature type.
261+
"""
262+
263+
STRING = "STRING"
264+
INTEGER = "INTEGER"
265+
FLOAT = "FLOAT"
266+
DOUBLE = "DOUBLE"
267+
BOOLEAN = "BOOLEAN"
268+
DATE = "DATE"
269+
TIMESTAMP = "TIMESTAMP"
270+
DECIMAL = "DECIMAL"
271+
BINARY = "BINARY"
272+
ARRAY = "ARRAY"
273+
MAP = "MAP"
274+
STRUCT = "STRUCT"
275+
276+
277+
class EntityType(Enum):
278+
"""
279+
An enumeration of the supported entity types.
280+
281+
Attributes:
282+
FEATURE_GROUP (str): A string representing the feature group.
283+
DATASET (str): An string representing the dataset.
284+
"""
285+
286+
FEATURE_GROUP = "FEATURE_GROUP"
287+
DATASET = "DATASET"

0 commit comments

Comments
 (0)