project-delphi
diff --git a/‎.env.example‎
Lines changed: 13 additions & 0 deletions b/‎.env.example‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 52 additions & 0 deletions b/‎.gitignore‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 78 additions & 0 deletions b/‎README.md‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎anomaly_detection/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎anomaly_detection/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎anomaly_detection/api/main.py‎
Lines changed: 58 additions & 0 deletions b/‎anomaly_detection/api/main.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎anomaly_detection/data/nab_loader.py‎
Lines changed: 48 additions & 0 deletions b/‎anomaly_detection/data/nab_loader.py‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎anomaly_detection/models/base.py‎
Lines changed: 34 additions & 0 deletions b/‎anomaly_detection/models/base.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎anomaly_detection/models/factory.py‎
Lines changed: 37 additions & 0 deletions b/‎anomaly_detection/models/factory.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎anomaly_detection/models/isolation_forest.py‎
Lines changed: 26 additions & 0 deletions b/‎anomaly_detection/models/isolation_forest.py‎
Lines changed: 26 additions & 0 deletions
@@ -0,0 +1,13 @@
+# AWS credentials (required for Random Cut Forest)
+AWS_ACCESS_KEY_ID=your_access_key_id
+AWS_SECRET_ACCESS_KEY=your_secret_access_key
+AWS_REGION=us-west-2
+
+# API settings
+API_HOST=0.0.0.0
+API_PORT=8000
+DEBUG=True
+
+# Model settings
+DEFAULT_MODEL=isolation_forest
+DEFAULT_CONTAMINATION=0.1
@@ -0,0 +1,52 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual Environment
+.venv/
+venv/
+ENV/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Environment variables
+.env
+
+# Data
+data/*.csv
+!data/synthetic_data.csv
+
+# Logs
+*.log
+
+# Coverage
+.coverage
+htmlcov/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pytest
+.pytest_cache/
@@ -0,0 +1,78 @@
+# Anomaly Detection Service
+
+A Python-based service for detecting anomalies in time series data using Random Cut Forest and Isolation Forest algorithms.
+
+## Features
+
+- REST API endpoints for training and inference
+- Support for multiple anomaly detection algorithms:
+  - Random Cut Forest (AWS)
+  - Isolation Forest
+- Integration with NAB (Numenta Anomaly Benchmark) datasets
+- Easy-to-use training and prediction interfaces
+
+## Project Structure
+
+```bash
+anomaly_detection/
+├── api/           # FastAPI endpoints
+├── core/          # Core anomaly detection logic
+├── data/          # Data processing and loading
+├── models/        # Model implementations
+├── utils/         # Utility functions
+└── tests/         # Test suite
+```
+
+## Setup
+
+1. Create a virtual environment:
+
+```bash
+python -m venv ./.venv
+source ./.venv/bin/activate  # On Windows: venv\Scripts\activate
+```
+
+2. Install dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+3. Set up environment variables:
+
+```bash
+cp .env.example .env
+# Edit .env with your AWS credentials if using RCF
+```
+
+## Usage
+
+1. Start the API server:
+
+```bash
+uvicorn anomaly_detection.api.main:app --reload
+```
+
+2. Train a model:
+
+```bash
+curl -X POST "http://localhost:8000/train" -H "Content-Type: application/json" -d '{"algorithm": "isolation_forest", "data_path": "path/to/data.csv"}'
+```
+
+3. Make predictions:
+
+```bash
+curl -X POST "http://localhost:8000/predict" -H "Content-Type: application/json" -d '{"algorithm": "isolation_forest", "data": [1.2, 2.3, 3.4]}'
+```
+
+## Testing
+
+Run tests with coverage:
+
+```bash
+pytest --cov=anomaly_detection tests/
+```
+
+## License
+
+MIT
@@ -0,0 +1,8 @@
+"""
+Anomaly Detection Service
+
+A Python-based service for detecting anomalies in time series data using
+Random Cut Forest and Isolation Forest algorithms.
+"""
+
+__version__ = "0.1.0" 
@@ -0,0 +1,58 @@
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Optional
+import numpy as np
+
+app = FastAPI(
+    title="Anomaly Detection Service",
+    description="A service for detecting anomalies in time series data",
+    version="1.0.0"
+)
+
+class TrainingRequest(BaseModel):
+    algorithm: str
+    data_path: str
+    parameters: Optional[dict] = None
+
+class PredictionRequest(BaseModel):
+    algorithm: str
+    data: List[float]
+    model_id: Optional[str] = None
+
+class PredictionResponse(BaseModel):
+    is_anomaly: bool
+    score: float
+    threshold: float
+
+@app.post("/train")
+async def train_model(request: TrainingRequest):
+    """
+    Train an anomaly detection model using the specified algorithm and data.
+    """
+    try:
+        # TODO: Implement training logic
+        return {"status": "success", "model_id": "model_123"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/predict")
+async def predict(request: PredictionRequest) -> PredictionResponse:
+    """
+    Make predictions using a trained anomaly detection model.
+    """
+    try:
+        # TODO: Implement prediction logic
+        return PredictionResponse(
+            is_anomaly=False,
+            score=0.5,
+            threshold=0.7
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/health")
+async def health_check():
+    """
+    Health check endpoint.
+    """
+    return {"status": "healthy"} 
@@ -0,0 +1,48 @@
+import pandas as pd
+import numpy as np
+from typing import Tuple, Optional
+import os
+
+class NABLoader:
+    """Loader for NAB (Numenta Anomaly Benchmark) datasets."""
+    
+    def __init__(self, data_dir: str = "data"):
+        self.data_dir = data_dir
+    
+    def load_dataset(self, dataset_name: str) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+        """
+        Load a NAB dataset and return features and labels.
+        
+        Args:
+            dataset_name: Name of the dataset to load
+            
+        Returns:
+            Tuple containing:
+            - features: numpy array of shape (n_samples, n_features)
+            - labels: numpy array of shape (n_samples,) or None if no labels
+        """
+        # Construct path to dataset
+        dataset_path = os.path.join(self.data_dir, dataset_name)
+        
+        if not os.path.exists(dataset_path):
+            raise FileNotFoundError(f"Dataset {dataset_name} not found in {self.data_dir}")
+        
+        # Load data
+        df = pd.read_csv(dataset_path)
+        
+        # Extract features (assuming first column is timestamp)
+        features = df.iloc[:, 1:].values
+        
+        # Check if labels exist (they might be in a separate file)
+        labels_path = os.path.join(self.data_dir, "labels", f"{dataset_name}_labels.csv")
+        labels = None
+        
+        if os.path.exists(labels_path):
+            labels_df = pd.read_csv(labels_path)
+            labels = labels_df.iloc[:, 1].values
+        
+        return features, labels
+    
+    def get_available_datasets(self) -> list:
+        """Get list of available datasets in the data directory."""
+        return [f for f in os.listdir(self.data_dir) if f.endswith('.csv')] 
@@ -0,0 +1,34 @@
+from abc import ABC, abstractmethod
+import numpy as np
+from typing import Dict, Any, Optional
+
+class AnomalyDetector(ABC):
+    """Base class for anomaly detection models."""
+    
+    def __init__(self, **kwargs):
+        self.model = None
+        self.threshold = None
+        self.parameters = kwargs
+    
+    @abstractmethod
+    def fit(self, X: np.ndarray) -> None:
+        """Fit the model to the training data."""
+        pass
+    
+    @abstractmethod
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        """Predict anomaly scores for the input data."""
+        pass
+    
+    def is_anomaly(self, X: np.ndarray) -> np.ndarray:
+        """Determine if samples are anomalies based on the threshold."""
+        scores = self.predict(X)
+        return scores > self.threshold
+    
+    def set_threshold(self, threshold: float) -> None:
+        """Set the anomaly detection threshold."""
+        self.threshold = threshold
+    
+    def get_parameters(self) -> Dict[str, Any]:
+        """Get the model parameters."""
+        return self.parameters 
@@ -0,0 +1,37 @@
+from typing import Dict, Type
+from .base import AnomalyDetector
+from .isolation_forest import IsolationForestDetector
+from .random_cut_forest import RandomCutForestDetector
+
+class ModelFactory:
+    """Factory class for creating anomaly detection models."""
+    
+    _models: Dict[str, Type[AnomalyDetector]] = {
+        "isolation_forest": IsolationForestDetector,
+        "random_cut_forest": RandomCutForestDetector
+    }
+    
+    @classmethod
+    def create_model(cls, model_type: str, **kwargs) -> AnomalyDetector:
+        """
+        Create an anomaly detection model.
+        
+        Args:
+            model_type: Type of model to create
+            **kwargs: Additional arguments to pass to the model constructor
+            
+        Returns:
+            An instance of the requested anomaly detection model
+            
+        Raises:
+            ValueError: If the requested model type is not supported
+        """
+        if model_type not in cls._models:
+            raise ValueError(f"Unsupported model type: {model_type}")
+        
+        return cls._models[model_type](**kwargs)
+    
+    @classmethod
+    def get_supported_models(cls) -> list:
+        """Get list of supported model types."""
+        return list(cls._models.keys()) 
@@ -0,0 +1,26 @@
+from sklearn.ensemble import IsolationForest
+import numpy as np
+from .base import AnomalyDetector
+
+class IsolationForestDetector(AnomalyDetector):
+    """Isolation Forest based anomaly detector."""
+    
+    def __init__(self, contamination: float = 0.1, **kwargs):
+        super().__init__(**kwargs)
+        self.contamination = contamination
+        self.model = IsolationForest(
+            contamination=contamination,
+            random_state=42,
+            **kwargs
+        )
+    
+    def fit(self, X: np.ndarray) -> None:
+        """Fit the Isolation Forest model."""
+        self.model.fit(X)
+        # Set threshold based on contamination
+        scores = self.model.score_samples(X)
+        self.threshold = np.percentile(scores, 100 * self.contamination)
+    
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        """Predict anomaly scores."""
+        return -self.model.score_samples(X)  # Convert to positive scores