@@ -30,32 +30,62 @@ def _build_model(self) -> AnomalyOutput:
3030
3131 model_kwargs = self .spec .model_kwargs
3232 # map the output as per anomaly dataset class, 1: outlier, 0: inlier
33- self .outlier_map = {1 : 0 , - 1 : 1 }
33+ # self.outlier_map = {1: 0, -1: 1}
3434
3535 anomaly_output = AnomalyOutput (date_column = "index" )
36- #TODO: PDB
37- import pdb
36+ # TODO: PDB
3837
39- pdb .set_trace ()
38+ # Set tree parameters
39+ num_trees = model_kwargs .get ("num_trees" , 200 )
40+ shingle_size = model_kwargs .get ("shingle_size" , 1 )
41+ tree_size = model_kwargs .get ("tree_size" , 1000 )
4042
4143 for target , df in self .datasets .full_data_dict .items ():
42- model = RCTree (** model_kwargs )
43- model .fit (df )
44- y_pred = model .predict (df )
45- y_pred = np .vectorize (self .outlier_map .get )(y_pred )
44+ df_values = df [self .spec .target_column ].astype (float ).values
45+ points = np .vstack (list (rrcf .shingle (df_values , size = 4 )))
4646
47- scores = model .score_samples (df )
47+ sample_size_range = (1 , 6 )
48+ n = points .shape [0 ]
49+ avg_codisp = pd .Series (0.0 , index = np .arange (n ))
50+ index = np .zeros (n )
4851
49- index_col = df .columns [0 ]
52+ forest = []
53+ while len (forest ) < num_trees :
54+ ixs = np .random .choice (n , size = sample_size_range , replace = False )
55+ trees = [rrcf .RCTree (points [ix ], index_labels = ix ) for ix in ixs ]
56+ forest .extend (trees )
57+ print (len (forest ))
5058
51- anomaly = pd .DataFrame (
52- {index_col : df [index_col ], OutputColumns .ANOMALY_COL : y_pred }
53- ).reset_index (drop = True )
54- score = pd .DataFrame (
55- {"index" : df [index_col ], OutputColumns .SCORE_COL : scores }
56- ).reset_index (drop = True )
59+ for tree in forest :
60+ codisp = pd .Series ({leaf : tree .codisp (leaf ) for leaf in tree .leaves })
61+ avg_codisp [codisp .index ] += codisp
62+ np .add .at (index , codisp .index .values , 1 )
5763
58- anomaly_output .add_output (target , anomaly , score )
64+ avg_codisp /= index
65+ avg_codisp .index = df .iloc [(4 - 1 ) :].index
66+ avg_codisp = (avg_codisp - avg_codisp .min ()) / (
67+ avg_codisp .max () - avg_codisp .min ()
68+ )
69+
70+ y_pred = (avg_codisp > np .percentile (avg_codisp , 95 )).astype (int )
71+
72+ import pdb
73+
74+ pdb .set_trace ()
75+ print ("Done" )
76+
77+ # scores = model.score_samples(df)
78+
79+ # index_col = df.columns[0]
80+
81+ # anomaly = pd.DataFrame(
82+ # {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
83+ # ).reset_index(drop=True)
84+ # score = pd.DataFrame(
85+ # {"index": df[index_col], OutputColumns.SCORE_COL: scores}
86+ # ).reset_index(drop=True)
87+
88+ # anomaly_output.add_output(target, anomaly, score)
5989
6090 return anomaly_output
6191
0 commit comments