Skip to content

Commit 6fa792a

Browse files
committed
Merge branch 'feature/forecast_v2' of https://github.com/oracle/accelerated-data-science into feature/forecast_v2_model_parameters
2 parents 11d3448 + 1f9d0fa commit 6fa792a

File tree

28 files changed

+2288
-41
lines changed

28 files changed

+2288
-41
lines changed

ads/opctl/operator/cmd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def init(
161161
)
162162
else:
163163
overwrite = True
164-
output = os.path.join(tempfile.TemporaryDirectory().name, "")
164+
output = operator_utils.create_output_folder(name=type + "/")
165165

166166
# generating operator specification
167167
operator_config = {}

ads/opctl/operator/common/utils.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,22 @@ class OperatorValidator(Validator):
2626
pass
2727

2828

29+
def create_output_folder(name):
30+
output_folder = name
31+
protocol = fsspec.utils.get_protocol(output_folder)
32+
storage_options = {}
33+
if protocol != "file":
34+
storage_options = auth or default_signer()
35+
36+
fs = fsspec.filesystem(protocol, **storage_options)
37+
name_suffix = 1
38+
while fs.exists(output_folder):
39+
name_suffix = name_suffix + 1
40+
output_folder = f"{name}_{name_suffix}"
41+
fs.mkdirs(output_folder)
42+
return output_folder
43+
44+
2945
def _build_image(
3046
dockerfile: str,
3147
image_name: str,
@@ -156,3 +172,23 @@ def default_signer(**kwargs):
156172
from ads.common.auth import default_signer
157173

158174
return default_signer(**kwargs)
175+
176+
177+
def human_time_friendly(seconds):
178+
TIME_DURATION_UNITS = (
179+
("week", 60 * 60 * 24 * 7),
180+
("day", 60 * 60 * 24),
181+
("hour", 60 * 60),
182+
("min", 60),
183+
)
184+
if seconds == 0:
185+
return "inf"
186+
accumulator = []
187+
for unit, div in TIME_DURATION_UNITS:
188+
amount, seconds = divmod(float(seconds), div)
189+
if amount > 0:
190+
accumulator.append(
191+
"{} {}{}".format(int(amount), unit, "" if amount == 1 else "s")
192+
)
193+
accumulator.append("{} secs".format(round(seconds, 2)))
194+
return ", ".join(accumulator)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
type: anomaly
2+
version: v1
3+
conda_type: published
4+
name: Anomaly Detection Operator
5+
gpu: no
6+
keywords:
7+
- Anomaly Detection
8+
backends:
9+
- job
10+
description: |
11+
Anomaly Detection is the identification of rare items, events, or observations in data that
12+
differ significantly from the expectation. This can be used for several scenarios like asset
13+
monitoring, maintenance and prognostic surveillance in industries such as utility,
14+
aviation and manufacturing.
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
# Anomaly Detection Operator
2+
3+
Anomaly Detection is the identification of rare items, events, or observations in data that differ significantly from the expectation. This can be used for several scenarios like asset monitoring, maintenance and prognostic surveillance in industries such as utility, aviation and manufacturing.
4+
5+
Below are the steps to configure and run the Anomaly Detection Operator on different resources.
6+
7+
## 1. Prerequisites
8+
9+
Follow the [CLI Configuration](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/opctl/configure.html) steps from the ADS documentation. This step is mandatory as it sets up default values for different options while running the Anomaly Detection Operator on OCI Data Science jobs or OCI Data Flow applications. If you have previously done this and used a flexible shape, make sure to adjust `ml_job_config.ini` with shape config details and `docker_registry` information.
10+
11+
- ocpus = 1
12+
- memory_in_gbs = 16
13+
- docker_registry = `<iad.ocir.io/namespace/>`
14+
15+
## 2. Generating configs
16+
17+
To generate starter configs, run the command below. This will create a list of YAML configs and place them in the `output` folder.
18+
19+
```bash
20+
ads operator init -t anomaly --overwrite --output ~/anomaly/
21+
```
22+
23+
The most important files expected to be generated are:
24+
25+
- `anomaly.yaml`: Contains anomaly detection related configuration.
26+
- `backend_operator_local_python_config.yaml`: This includes a local backend configuration for running anomaly detection in a local environment. The environment should be set up manually before running the operator.
27+
- `backend_operator_local_container_config.yaml`: This includes a local backend configuration for running anomaly detection within a local container. The container should be built before running the operator. Please refer to the instructions below for details on how to accomplish this.
28+
- `backend_job_container_config.yaml`: Contains Data Science job-related config to run anomaly detection in a Data Science job within a container (BYOC) runtime. The container should be built and published before running the operator. Please refer to the instructions below for details on how to accomplish this.
29+
- `backend_job_python_config.yaml`: Contains Data Science job-related config to run anomaly detection in a Data Science job within a conda runtime. The conda should be built and published before running the operator.
30+
31+
All generated configurations should be ready to use without the need for any additional adjustments. However, they are provided as starter kit configurations that can be customized as needed.
32+
33+
## 3. Running anomaly detection on the local conda environment
34+
35+
To run anomaly detection locally, create and activate a new conda environment (`ads-anomaly`). Install all the required libraries listed in the `environment.yaml` file.
36+
37+
```yaml
38+
- datapane
39+
- cerberus
40+
- oracle-automlx==23.2.3
41+
- "git+https://github.com/oracle/accelerated-data-science.git@feature/anomaly#egg=oracle-ads"
42+
```
43+
44+
Please review the previously generated `anomaly.yaml` file using the `init` command, and make any necessary adjustments to the input and output file locations. By default, it assumes that the files should be located in the same folder from which the `init` command was executed.
45+
46+
Use the command below to verify the anomaly detection config.
47+
48+
```bash
49+
ads operator verify -f ~/anomaly/anomaly.yaml
50+
```
51+
52+
Use the following command to run the anomaly detection within the `ads-anomaly` conda environment.
53+
54+
```bash
55+
ads operator run -f ~/anomaly/anomaly.yaml -b local
56+
```
57+
58+
The operator will run in your local environment without requiring any additional modifications.
59+
60+
## 4. Running anomaly detection on the local container
61+
62+
To run the anomaly detection detection operator within a local container, follow these steps:
63+
64+
Use the command below to build the anomaly detection container.
65+
66+
```bash
67+
ads operator build-image -t anomaly
68+
```
69+
70+
This will create a new `anomaly:v1` image, with `/etc/operator` as the designated working directory within the container.
71+
72+
73+
Check the `backend_operator_local_container_config.yaml` config file. By default, it should have a `volume` section with the `.oci` configs folder mounted.
74+
75+
```yaml
76+
volume:
77+
- "/Users/<user>/.oci:/root/.oci"
78+
```
79+
80+
Mounting the OCI configs folder is only required if an OCI Object Storage bucket will be used to store the input anomaly detection data or output anomaly detection result. The input/output folders can also be mounted to the container.
81+
82+
```yaml
83+
volume:
84+
- /Users/<user>/.oci:/root/.oci
85+
- /Users/<user>/anomaly/data:/etc/operator/data
86+
- /Users/<user>/anomaly/result:/etc/operator/result
87+
```
88+
89+
The full config can look like:
90+
```yaml
91+
kind: operator.local
92+
spec:
93+
image: anomaly:v1
94+
volume:
95+
- /Users/<user>/.oci:/root/.oci
96+
- /Users/<user>/anomaly/data:/etc/operator/data
97+
- /Users/<user>/anomaly/result:/etc/operator/result
98+
type: container
99+
version: v1
100+
```
101+
102+
Run the anomaly detection within a container using the command below:
103+
104+
```bash
105+
ads operator run -f ~/anomaly/anomaly.yaml --backend-config ~/anomaly/backend_operator_local_container_config.yaml
106+
```
107+
108+
## 5. Running anomaly detection in the Data Science job within container runtime
109+
110+
To execute the anomaly detection detection operator within a Data Science job using container runtime, please follow the steps outlined below:
111+
112+
You can use the following command to build the anomaly detection container. This step can be skipped if you have already done this for running the operator within a local container.
113+
114+
```bash
115+
ads operator build-image -t anomaly
116+
```
117+
118+
This will create a new `anomaly:v1` image, with `/etc/operator` as the designated working directory within the container.
119+
120+
Publish the `anomaly:v1` container to the [Oracle Container Registry](https://docs.public.oneportal.content.oci.oraclecloud.com/en-us/iaas/Content/Registry/home.htm). To become familiar with OCI, read the documentation links posted below.
121+
122+
- [Access Container Registry](https://docs.public.oneportal.content.oci.oraclecloud.com/en-us/iaas/Content/Registry/Concepts/registryoverview.htm#access)
123+
- [Create repositories](https://docs.public.oneportal.content.oci.oraclecloud.com/en-us/iaas/Content/Registry/Tasks/registrycreatingarepository.htm#top)
124+
- [Push images](https://docs.public.oneportal.content.oci.oraclecloud.com/en-us/iaas/Content/Registry/Tasks/registrypushingimagesusingthedockercli.htm#Pushing_Images_Using_the_Docker_CLI)
125+
126+
To publish `anomaly:v1` to OCR, use the command posted below:
127+
128+
```bash
129+
ads operator publish-image anomaly:v1 --registry <iad.ocir.io/tenancy/>
130+
```
131+
132+
After the container is published to OCR, it can be used within Data Science jobs service. Check the `backend_job_container_config.yaml` config file. It should contain pre-populated infrastructure and runtime sections. The runtime section should contain an image property, something like `image: iad.ocir.io/<tenancy>/anomaly:v1`. More details about supported options can be found in the ADS Jobs documentation - [Run a Container](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/jobs/run_container.html).
133+
134+
Adjust the `anomaly.yaml` config with proper input/output folders. When the anomaly detection is run in the Data Science job, it will not have access to local folders. Therefore, input data and output folders should be placed in the Object Storage bucket. Open the `anomaly.yaml` and adjust the following fields:
135+
136+
```yaml
137+
input_data:
138+
url: oci://bucket@namespace/anomaly/input_data/data.csv
139+
output_directory:
140+
url: oci://bucket@namespace/anomaly/result/
141+
test_data:
142+
url: oci://bucket@namespace/anomaly/input_data/test.csv
143+
```
144+
145+
Run the anomaly detection on the Data Science jobs using the command posted below:
146+
147+
```bash
148+
ads operator run -f ~/anomaly/anomaly.yaml --backend-config ~/anomaly/backend_job_container_config.yaml
149+
```
150+
151+
The logs can be monitored using the `ads opctl watch` command.
152+
153+
```bash
154+
ads opctl watch <OCID>
155+
```
156+
157+
## 6. Running anomaly detection in the Data Science job within conda runtime
158+
159+
To execute the anomaly detection detection operator within a Data Science job using conda runtime, please follow the steps outlined below:
160+
161+
You can use the following command to build the anomaly detection conda environment.
162+
163+
```bash
164+
ads operator build-conda -t anomaly
165+
```
166+
167+
This will create a new `anomaly_v1` conda environment and place it in the folder specified within `ads opctl configure` command.
168+
169+
Use the command below to Publish the `anomaly_v1` conda environment to the Object Storage bucket.
170+
171+
```bash
172+
ads operator publish-conda -t anomaly
173+
```
174+
More details about configuring CLI can be found here - [Configuring CLI](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/opctl/configure.html)
175+
176+
177+
After the conda environment is published to Object Storage, it can be used within Data Science jobs service. Check the `backend_job_python_config.yaml` config file. It should contain pre-populated infrastructure and runtime sections. The runtime section should contain a `conda` section.
178+
179+
```yaml
180+
conda:
181+
type: published
182+
uri: oci://bucket@namespace/conda_environments/cpu/anomaly/1/anomaly_v1
183+
```
184+
185+
More details about supported options can be found in the ADS Jobs documentation - [Run a Python Workload](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/jobs/run_python.html).
186+
187+
Adjust the `anomaly.yaml` config with proper input/output folders. When the anomaly detection is run in the Data Science job, it will not have access to local folders. Therefore, input data and output folders should be placed in the Object Storage bucket. Open the `anomaly.yaml` and adjust the following fields:
188+
189+
```yaml
190+
input_data:
191+
url: oci://bucket@namespace/anomaly/input_data/data.csv
192+
output_directory:
193+
url: oci://bucket@namespace/anomaly/result/
194+
test_data:
195+
url: oci://bucket@namespace/anomaly/input_data/test.csv
196+
```
197+
198+
Run the anomaly detection on the Data Science jobs using the command posted below:
199+
200+
```bash
201+
ads operator run -f ~/anomaly/anomaly.yaml --backend-config ~/anomaly/backend_job_python_config.yaml
202+
```
203+
204+
The logs can be monitored using the `ads opctl watch` command.
205+
206+
```bash
207+
ads opctl watch <OCID>
208+
```
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
import json
8+
import os
9+
import sys
10+
from typing import Dict, List
11+
12+
import yaml
13+
14+
from ads.opctl import logger
15+
from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS
16+
from ads.opctl.operator.common.utils import _parse_input_args
17+
18+
from .model.anomaly_dataset import AnomalyDatasets
19+
from .operator_config import AnomalyOperatorConfig
20+
21+
22+
def operate(operator_config: AnomalyOperatorConfig) -> None:
23+
"""Runs the anomaly detection operator."""
24+
from .model.factory import AnomalyOperatorModelFactory
25+
26+
datasets = AnomalyDatasets(operator_config)
27+
AnomalyOperatorModelFactory.get_model(operator_config, datasets).generate_report()
28+
29+
30+
def verify(spec: Dict, **kwargs: Dict) -> bool:
31+
"""Verifies the anomaly detection operator config."""
32+
operator = AnomalyOperatorConfig.from_dict(spec)
33+
msg_header = (
34+
f"{'*' * 50} The operator config has been successfully verified {'*' * 50}"
35+
)
36+
print(msg_header)
37+
print(operator.to_yaml())
38+
print("*" * len(msg_header))
39+
40+
41+
def main(raw_args: List[str]):
42+
"""The entry point of the anomaly the operator."""
43+
args, _ = _parse_input_args(raw_args)
44+
if not args.file and not args.spec and not os.environ.get(ENV_OPERATOR_ARGS):
45+
logger.info(
46+
"Please specify -f[--file] or -s[--spec] or "
47+
f"pass operator's arguments via {ENV_OPERATOR_ARGS} environment variable."
48+
)
49+
return
50+
51+
logger.info("-" * 100)
52+
logger.info(
53+
f"{'Running' if not args.verify else 'Verifying'} the anomaly detection operator."
54+
)
55+
56+
# if spec provided as input string, then convert the string into YAML
57+
yaml_string = ""
58+
if args.spec or os.environ.get(ENV_OPERATOR_ARGS):
59+
operator_spec_str = args.spec or os.environ.get(ENV_OPERATOR_ARGS)
60+
try:
61+
yaml_string = yaml.safe_dump(json.loads(operator_spec_str))
62+
except json.JSONDecodeError:
63+
yaml_string = yaml.safe_dump(yaml.safe_load(operator_spec_str))
64+
except:
65+
yaml_string = operator_spec_str
66+
67+
operator_config = AnomalyOperatorConfig.from_yaml(
68+
uri=args.file,
69+
yaml_string=yaml_string,
70+
)
71+
72+
logger.info(operator_config.to_yaml())
73+
74+
# run operator
75+
if args.verify:
76+
verify(operator_config)
77+
else:
78+
operate(operator_config)
79+
80+
81+
if __name__ == "__main__":
82+
main(sys.argv[1:])

0 commit comments

Comments
 (0)