Skip to content

Commit a0b5e85

Browse files
authored
added validation data metrics and updated report, inliers, outliers generation (#526)
2 parents c0a9a5f + a1f3a56 commit a0b5e85

File tree

9 files changed

+328
-61
lines changed

9 files changed

+328
-61
lines changed

ads/opctl/operator/lowcode/anomaly/const.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,32 @@ class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
4848
UNSUPERVISED_UNIFY95 = "unsupervised_unify95"
4949
UNSUPERVISED_UNIFY95_LOG_LOSS = "unsupervised_unify95_log_loss"
5050
UNSUPERVISED_N1_EXPERTS = "unsupervised_n-1_experts"
51-
51+
RECALL = "Recall"
52+
PRECISION = "Precision"
53+
ACCURACY = "Accuracy"
54+
F1_SCORE = "f1_score"
55+
FP = "False Positive"
56+
FN = "False Negative"
57+
TP = "True Positive"
58+
TN = "True Negative"
59+
ROC_AUC = "ROC_AUC"
60+
PRC_AUC = "PRC_AUC"
61+
MCC = "MCC"
62+
MEAN_RECALL = "Mean Recall"
63+
MEAN_PRECISION = "Mean Precision"
64+
MEAN_ACCURACY = "Mean Accuracy"
65+
MEAN_F1_SCORE = "Mean f1_score"
66+
MEAN_ROC_AUC = "Mean ROC_AUC"
67+
MEAN_PRC_AUC = "Mean PRC_AUC"
68+
MEAN_MCC = "Mean MCC"
69+
MEDIAN_RECALL = "Median Recall"
70+
MEDIAN_PRECISION = "Median Precision"
71+
MEDIAN_ACCURACY = "Median Accuracy"
72+
MEDIAN_F1_SCORE = "Median f1_score"
73+
MEDIAN_ROC_AUC = "Median ROC_AUC"
74+
MEDIAN_PRC_AUC = "Median PRC_AUC"
75+
MEDIAN_MCC = "Median MCC"
76+
ELAPSED_TIME = "Elapsed Time"
5277

5378
class OutputColumns(str, metaclass=ExtendedEnumMeta):
5479
ANOMALY_COL = "anomaly"

ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,9 @@ def _load_data(self, spec):
6969

7070

7171
class AnomalyOutput:
72-
def __init__(self):
72+
def __init__(self, date_column):
7373
self.category_map = dict()
74+
self.date_column = date_column
7475

7576
def add_output(self, category: str, anomalies: pd.DataFrame, scores: pd.DataFrame):
7677
self.category_map[category] = (anomalies, scores)
@@ -83,15 +84,29 @@ def get_scores_by_cat(self, category: str):
8384

8485
def get_inliers_by_cat(self, category: str, data: pd.DataFrame):
8586
anomaly = self.get_anomalies_by_cat(category)
87+
scores = self.get_scores_by_cat(category)
8688
inlier_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 0]
87-
88-
return data.iloc[inlier_indices]
89+
inliers = data.iloc[inlier_indices]
90+
if scores is not None and not scores.empty:
91+
inliers = pd.merge(
92+
inliers,
93+
scores,
94+
on=self.date_column,
95+
how='inner')
96+
return inliers
8997

9098
def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
9199
anomaly = self.get_anomalies_by_cat(category)
100+
scores = self.get_scores_by_cat(category)
92101
outliers_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 1]
93-
94-
return data.iloc[outliers_indices]
102+
outliers = data.iloc[outliers_indices]
103+
if scores is not None and not scores.empty:
104+
outliers = pd.merge(
105+
outliers,
106+
scores,
107+
on=self.date_column,
108+
how='inner')
109+
return outliers
95110

96111
def get_inliers(self, full_data_dict):
97112
inliers = pd.DataFrame()
@@ -128,3 +143,6 @@ def get_scores(self, target_category_columns):
128143
score[target_category_columns[0]] = category
129144
scores = pd.concat([scores, score], axis=0, ignore_index=True)
130145
return scores
146+
147+
def get_num_anomalies_by_cat(self, category: str):
148+
return (self.category_map[category][0][OutputColumns.ANOMALY_COL] == 1).sum()

ads/opctl/operator/lowcode/anomaly/model/automlx.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import pandas as pd
88

99
from ads.common.decorator.runtime_dependency import runtime_dependency
10+
from .anomaly_dataset import AnomalyOutput
1011

1112
from .base_model import AnomalyOperatorBaseModel
1213
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
@@ -22,11 +23,34 @@ class AutoMLXOperatorModel(AnomalyOperatorBaseModel):
2223
),
2324
)
2425
def _build_model(self) -> pd.DataFrame:
25-
est = automl.Pipeline(task='anomaly_detection')
26+
27+
28+
date_column = self.spec.datetime_column.name
2629
dataset = self.datasets
27-
est.fit(dataset.data, y=None)
28-
y_pred = est.predict(dataset.data)
29-
dataset.data[OutputColumns.ANOMALY_COL] = y_pred
30+
31+
full_data_dict = dataset.full_data_dict
32+
33+
anomaly_output = AnomalyOutput(date_column=date_column)
34+
35+
# Iterate over the full_data_dict items
36+
for target, df in full_data_dict.items():
37+
est = automl.Pipeline(task='anomaly_detection')
38+
est.fit(df, y=None)
39+
y_pred = est.predict(df)
40+
scores = est.predict_proba(df)
41+
42+
anomaly = pd.DataFrame({
43+
date_column: df[date_column],
44+
OutputColumns.ANOMALY_COL: y_pred
45+
})
46+
score = pd.DataFrame({
47+
date_column: df[date_column],
48+
OutputColumns.SCORE_COL: [item[1] for item in scores]
49+
})
50+
anomaly_output.add_output(target, anomaly, score)
51+
52+
return anomaly_output
53+
3054

3155
def _generate_report(self):
3256
import datapane as dp

ads/opctl/operator/lowcode/anomaly/model/autots.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,7 @@ def _build_model(self) -> AnomalyOutput:
4848

4949
full_data_dict = dataset.full_data_dict
5050

51-
target_category_column = (
52-
self.spec.target_category_columns[0]
53-
if self.spec.target_category_columns is not None
54-
else None
55-
)
56-
57-
anomaly_output = AnomalyOutput()
51+
anomaly_output = AnomalyOutput(date_column=date_column)
5852

5953
# Iterate over the full_data_dict items
6054
for target, df in full_data_dict.items():
@@ -70,6 +64,7 @@ def _build_model(self) -> AnomalyOutput:
7064
columns={score.columns.values[0]: OutputColumns.SCORE_COL},
7165
inplace=True,
7266
)
67+
score = 1-score
7368
score = score.reset_index(drop=False)
7469

7570
col = anomaly.columns.values[0]

0 commit comments

Comments
 (0)