PySATL · Mmorgenn · Oct 29, 2025 · Oct 29, 2025 · Oct 30, 2025 · Nov 1, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,6 +23,10 @@ scikit-learn = ">=1.6.1,<2.0.0"
 scipy = ">=1.15.2,<2.0.0"
 seaborn = ">=0.13.2,<0.14.0"
 scikit-fuzzy = "^0.5.0"
+kneed = "^0.8.5"
+pywavelets = "^1.9.0"
+gdown = "^5.2.0"
+xgboost = "^3.1.1"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.4"

diff --git a/rework_pysatl_mpest/preprocessing/components_family/__init__.py b/rework_pysatl_mpest/preprocessing/components_family/__init__.py
@@ -0,0 +1,37 @@
+"""
+components_family module for evaluating the family of components of a mixture
+
+This module provides a ready-made model for determining the most likely families of mixture components.
+The choice of a mixture component family is important when working with mixtures of distributions
+
+**Purpose**
+
+components_family module helps to speed up the search for the most suitable mixture component configuration
+by narrowing down the search to a few options
+
+**Usage Example**
+
+.. code-block:: python
+    >>> import numpy as np
+    >>> from rework_pysatl_mpest.preprocessing.components_family import ComponentsFamily
+    >>> from rework_pysatl_mpest.preprocessing.components_family import XGBBaseModel
+
+    >>> # Create random sample
+    >>> X = np.linspace(-10, 10, 200)
+
+    >>> # Determine 5 possible configurations using XGBaseModel
+    >>> model = ComponentsFamily(XGBBaseModel, top_k=5)
+    >>> configurations = model.predict(X)
+
+    >>> print(f"Best 5 configurations: {configurations}")
+    >>> print(f"Best configuration: {configurations[0]}")
+"""
+
+__author__ = "Mark Dubrovchenko"
+__copyright__ = "Copyright (c) 2025 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from rework_pysatl_mpest.preprocessing.components_family.components_family import ComponentsFamily
+from rework_pysatl_mpest.preprocessing.components_family.mixture_classifiers import XGBBaseModel
+
+__all__ = ["ComponentsFamily", "XGBBaseModel"]
diff --git a/rework_pysatl_mpest/preprocessing/components_family/classifier_criterions.py b/rework_pysatl_mpest/preprocessing/components_family/classifier_criterions.py
@@ -0,0 +1,73 @@
+"""Module which contains collector of a vector of criterions for mixture classifier"""
+
+__author__ = "Mark Dubrovchenko"
+__copyright__ = "Copyright (c) 2025 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+import warnings
+from math import ceil
+
+import numpy as np
+from rework_pysatl_mpest.preprocessing.components_family.criterions import base_criterions
+from rework_pysatl_mpest.preprocessing.components_family.criterions.abstract_criterion import (
+    AHistClassifierCriterion,
+    APeaksClassifierCriterion,
+    ASampleClassifierCriterion,
+)
+from scipy.stats import iqr
+
+
+class MixtureClassifierCriterions:
+    """
+    MixtureClassifierCriterions
+
+    Parameters
+    ----------
+    :criterions list[ASampleClassifierCriterion | APeaksClassifierCriterion | AHistClassifierCriterion]
+
+    — List of criterions for the mixture classifiers
+    """
+
+    def __init__(
+        self,
+        criterions: list[
+            ASampleClassifierCriterion | APeaksClassifierCriterion | AHistClassifierCriterion
+        ] = base_criterions,
+    ) -> None:
+        self.criterions = criterions
+
+    @staticmethod
+    def _get_hist(X: np.ndarray) -> np.ndarray:
+        """A function for constructing a histogram with constraints"""
+        n = X.size
+        bmin = 20
+        bmax = 150
+
+        h = 1 * iqr(X) * n ** (-1 / 3)
+        bins = ceil((X.max() - X.min()) / h) if h > 0 else bmin
+        nbins = max(bmin, min(bins, bmax))
+
+        hist = np.histogram(X, bins=nbins, density=True)[0]
+
+        return hist
+
+    @staticmethod
+    def _get_criterion(
+        X: np.ndarray,
+        hist: np.ndarray,
+        criterion: (ASampleClassifierCriterion | APeaksClassifierCriterion | AHistClassifierCriterion),
+    ) -> float:
+        """Function for obtaining a single criterion based on a sample"""
+
+        warnings.filterwarnings("ignore")
+
+        if isinstance(criterion, ASampleClassifierCriterion):
+            return criterion.score(X)
+
+        return criterion.score(hist)
+
+    def get_criterions(self, X: np.ndarray) -> dict[str, float]:
+        """Function for evaluating a feature vector based on a sample"""
+
+        hist_list = self._get_hist(X)
+        return dict([(criterion.name, self._get_criterion(X, hist_list, criterion)) for criterion in self.criterions])
diff --git a/rework_pysatl_mpest/preprocessing/components_family/classifier_models/__init__.py b/rework_pysatl_mpest/preprocessing/components_family/classifier_models/__init__.py
@@ -0,0 +1,14 @@
+"""Module which contains interface of the classifier model and supported classifier models"""
+
+__author__ = "Mark Dubrovchenko"
+__copyright__ = "Copyright (c) 2025 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from rework_pysatl_mpest.preprocessing.components_family.classifier_models.classifier_interface import (
+    IClassifier,
+)
+from rework_pysatl_mpest.preprocessing.components_family.classifier_models.classifier_models import (
+    XGBClassifier,
+)
+
+__all__ = ["IClassifier", "XGBClassifier"]
diff --git a/...rk_pysatl_mpest/preprocessing/components_family/classifier_models/classifier_interface.py b/...rk_pysatl_mpest/preprocessing/components_family/classifier_models/classifier_interface.py
@@ -0,0 +1,26 @@
+"""Module which contains interface of the classifier model"""
+
+__author__ = "Mark Dubrovchenko"
+__copyright__ = "Copyright (c) 2025 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+
+class IClassifier(ABC):
+    """Class representing an interface for classification models"""
+
+    @property
+    @abstractmethod
+    def is_fitted(self) -> bool:
+        """A property indicating whether the model has been trained"""
+
+    @abstractmethod
+    def predict(self, criterions: dict[str, float]) -> np.ndarray:
+        """Abstract method for implementing a model prediction"""
+
+    @abstractmethod
+    def load_model(self, model_path: str) -> None:
+        """An abstract method for implementing model loading"""
diff --git a/rework_pysatl_mpest/preprocessing/components_family/classifier_models/classifier_models.py b/rework_pysatl_mpest/preprocessing/components_family/classifier_models/classifier_models.py
@@ -0,0 +1,34 @@
+"""Module which contains all supported classifier models"""
+
+__author__ = "Mark Dubrovchenko"
+__copyright__ = "Copyright (c) 2025 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+import numpy as np
+import xgboost as xgb
+from rework_pysatl_mpest.preprocessing.components_family.classifier_models.classifier_interface import (
+    IClassifier,
+)
+
+
+class XGBClassifier(IClassifier):
+    """Implementation of XGBoosting-based classifier"""
+
+    def __init__(self) -> None:
+        self.model = xgb.Booster()
+        self._is_fitted: bool = False
+
+    @property
+    def is_fitted(self) -> bool:
+        return self._is_fitted
+
+    def load_model(self, model_path: str) -> None:
+        self.model.load_model(model_path)
+        self._is_fitted = True
+
+    def predict(self, criterions: dict[str, float]) -> np.ndarray:
+        feature_names = list(criterions.keys())
+        values = [criterions[name] for name in feature_names]
+        features = xgb.DMatrix([values], feature_names=feature_names)
+
+        return self.model.predict(features)
diff --git a/rework_pysatl_mpest/preprocessing/components_family/components_family.py b/rework_pysatl_mpest/preprocessing/components_family/components_family.py
@@ -0,0 +1,91 @@
+"""Module which contains method for initial estimation of mixture components family based on mixture classifier"""
+
+__author__ = "Mark Dubrovchenko"
+__copyright__ = "Copyright (c) 2025 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+import numpy as np
+from rework_pysatl_mpest.distributions import ContinuousDistribution
+from rework_pysatl_mpest.preprocessing.components_family.mixture_classifiers.mixture_classifier import (
+    MixtureClassifierModel,
+)
+from rework_pysatl_mpest.preprocessing.components_number.abstract_estimator import AComponentsNumber
+
+
+class ComponentsFamily:
+    """
+    ComponentsFamily
+
+    Parameters
+    ----------
+    :model: MixtureClassifierModel  — Mixture Classifier Model
+    :top_k: int                     — Top k most likely mixtures
+    :components_number: int | None  — Method for estimating number of components
+    :random_state: int | None       — Determines random generation for some criterions
+    """
+
+    def __init__(
+        self,
+        recognition_model: MixtureClassifierModel,
+        top_k: int,
+        components_number: AComponentsNumber | None = None,
+        state: int | None = None,
+    ) -> None:
+        self.model = recognition_model
+        self.top_k = top_k
+        self.components_number = components_number
+        self.state = state
+
+    def predict(self, X: np.ndarray, k: int | list[int] | None = None) -> list[list[ContinuousDistribution]]:
+        """
+        Function for evaluating the top k most probable configurations
+
+        Parameters
+        ----------
+        :X: np.ndarray             — Sample Data
+        :k: int | list[int] | None —  The set number of components of the mixture
+
+        k is a specific number, or a number in a specified range, or None
+        (to determine the number of components using a specified method,
+        or if no method is specified, to use the entire range from 1 to 10 components)
+
+        Returns
+        ----------
+        list[list[Distribution]]
+
+        — List of mixture configurations using distribution classes for further work with the mixture
+        """
+
+        def __get_components_n(k: None | int | list[int]) -> list[int]:
+            """Function that defines the boundaries of the possible number of mixture components"""
+            upper_bound = 10
+
+            if isinstance(k, int):
+                return [k]
+
+            if isinstance(k, list):
+                return k
+
+            if isinstance(self.components_number, AComponentsNumber):
+                comp_k = self.components_number.estimate(X)
+                return [max(comp_k - 1, 1), comp_k, min(comp_k + 1, upper_bound)]
+
+            return [i for i in range(1, upper_bound + 1)]
+
+        np.random.seed(self.state)
+
+        n = __get_components_n(k)
+        prob = self.model.predict(X)
+        result: list[list[ContinuousDistribution]] = []
+
+        for i in np.argsort(prob)[::-1]:
+            if len(result) == self.top_k:
+                break
+
+            components = self.model.transform(i)
+            if len(components) not in n:
+                continue
+
+            result.append(components)
+
+        return result