1+ import pandas as pd
2+ import numpy as np
3+ from typing import Tuple , Optional
4+ import os
5+
6+ class NABLoader :
7+ """Loader for NAB (Numenta Anomaly Benchmark) datasets."""
8+
9+ def __init__ (self , data_dir : str = "data" ):
10+ self .data_dir = data_dir
11+
12+ def load_dataset (self , dataset_name : str ) -> Tuple [np .ndarray , Optional [np .ndarray ]]:
13+ """
14+ Load a NAB dataset and return features and labels.
15+
16+ Args:
17+ dataset_name: Name of the dataset to load
18+
19+ Returns:
20+ Tuple containing:
21+ - features: numpy array of shape (n_samples, n_features)
22+ - labels: numpy array of shape (n_samples,) or None if no labels
23+ """
24+ # Construct path to dataset
25+ dataset_path = os .path .join (self .data_dir , dataset_name )
26+
27+ if not os .path .exists (dataset_path ):
28+ raise FileNotFoundError (f"Dataset { dataset_name } not found in { self .data_dir } " )
29+
30+ # Load data
31+ df = pd .read_csv (dataset_path )
32+
33+ # Extract features (assuming first column is timestamp)
34+ features = df .iloc [:, 1 :].values
35+
36+ # Check if labels exist (they might be in a separate file)
37+ labels_path = os .path .join (self .data_dir , "labels" , f"{ dataset_name } _labels.csv" )
38+ labels = None
39+
40+ if os .path .exists (labels_path ):
41+ labels_df = pd .read_csv (labels_path )
42+ labels = labels_df .iloc [:, 1 ].values
43+
44+ return features , labels
45+
46+ def get_available_datasets (self ) -> list :
47+ """Get list of available datasets in the data directory."""
48+ return [f for f in os .listdir (self .data_dir ) if f .endswith ('.csv' )]
0 commit comments