From 21daa024141a158a8a73bb0b582e17e0ff636c8c Mon Sep 17 00:00:00 2001 From: xImoZA Date: Wed, 5 Nov 2025 01:35:40 +0300 Subject: [PATCH 1/7] feat(core, distributions): switch to generics --- rework_pysatl_mpest/core/mixture.py | 119 ++++++----- rework_pysatl_mpest/core/parameter.py | 39 ++-- rework_pysatl_mpest/distributions/beta.py | 96 +++++---- rework_pysatl_mpest/distributions/cauchy.py | 68 +++--- .../distributions/continuous_dist.py | 96 +++++++-- .../distributions/exponential.py | 57 ++--- rework_pysatl_mpest/distributions/normal.py | 53 +++-- rework_pysatl_mpest/distributions/pareto.py | 66 +++--- rework_pysatl_mpest/distributions/uniform.py | 51 +++-- rework_pysatl_mpest/distributions/weibull.py | 76 ++++--- .../initializers/clusterize_initializer.py | 2 +- rework_pysatl_mpest/typings.py | 11 + rework_tests/unit/core/test_mixture.py | 102 ++++++++- rework_tests/unit/core/test_parameter.py | 60 +++++- rework_tests/unit/distributions/test_beta.py | 38 +++- .../unit/distributions/test_cauchy.py | 33 ++- .../test_continuous_distribution.py | 199 ++++++++++++++++-- .../unit/distributions/test_exponential.py | 37 +++- .../unit/distributions/test_normal.py | 33 ++- .../unit/distributions/test_pareto.py | 35 ++- .../unit/distributions/test_uniform.py | 33 ++- .../unit/distributions/test_weibull.py | 34 ++- .../iterative/pruners/test_prior_pruner.py | 4 +- 23 files changed, 1033 insertions(+), 309 deletions(-) create mode 100644 rework_pysatl_mpest/typings.py diff --git a/rework_pysatl_mpest/core/mixture.py b/rework_pysatl_mpest/core/mixture.py index 9f65669f..26e2c709 100644 --- a/rework_pysatl_mpest/core/mixture.py +++ b/rework_pysatl_mpest/core/mixture.py @@ -9,22 +9,25 @@ from collections.abc import Iterator, Sequence from copy import copy -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Generic, Optional import numpy as np -from numpy import float64 from numpy.typing import ArrayLike, NDArray from scipy.special import logsumexp, softmax +from ..typings import DType + if TYPE_CHECKING: from ..distributions import ContinuousDistribution -class MixtureModel: +class MixtureModel(Generic[DType]): """Represents a finite mixture of continuous probability distributions. This class encapsulates a collection of distribution components and their - corresponding weights. + corresponding weights. All components within the mixture are automatically + converted to the specified `dtype` of the MixtureModel, ensuring + computational consistency. Instances of this class can be compared for equality (``==``) and inequality (``!=``). Two models are considered equal if they have the @@ -39,17 +42,21 @@ class MixtureModel: An array of initial weights for the components. The weights must be positive and sum to 1. If None, components are assigned equal weights. Defaults to None. + dtype : type[DType], optional + The numpy data type used for internal calculations and + output arrays (e.g., `np.float32` or `np.float64`). + Defaults to `np.float64`. Attributes ---------- - components : tuple[ContinuousDistribution] + components : tuple[ContinuousDistribution[DType], ...] A tuple of the distribution objects that form the mixture. n_components : int The number of components in the mixture. - weights : NDArray[np.float64] + weights : NDArray[DType] A NumPy array of the normalized weights for each component. The sum of weights is always 1. - log_weights : NDArray[np.float64] + log_weights : NDArray[DType] A NumPy array of the natural logarithm of the component weights. Raises @@ -72,31 +79,40 @@ class MixtureModel: generate """ - def __init__(self, components: Sequence["ContinuousDistribution"], weights: Optional[ArrayLike] = None): + _dtype: type[DType] + + def __init__( + self, + components: Sequence["ContinuousDistribution"], + weights: Optional[ArrayLike] = None, + dtype: type[DType] = np.float64, # type: ignore[assignment] + ): n_components = len(components) if n_components == 0: raise ValueError("List of components cannot be an empty") + self._dtype = dtype + if weights is None: - weights = np.full(n_components, 1.0 / n_components) + weights = np.full(n_components, 1.0 / n_components, dtype=self.dtype) else: - weights = np.asarray(weights, dtype=float64) + weights = np.asarray(weights, dtype=self.dtype) self._validate_weights(n_components, weights) - self._components = list(components) - self._log_weights = np.log(weights + 1e-30) - self._cached_weights: Optional[NDArray[float64]] = None + self._components = [comp.astype(self.dtype) for comp in components] + self._log_weights = np.log(weights + self.dtype(1e-30)) + self._cached_weights: Optional[NDArray[DType]] = None - self._sorted_pairs_cache: Optional[list[tuple[ContinuousDistribution, float]]] = None + self._sorted_pairs_cache: Optional[list[tuple[ContinuousDistribution[DType], DType]]] = None - def _validate_weights(self, n_components: int, weights: NDArray[float64]): + def _validate_weights(self, n_components: int, weights: NDArray[DType]): """Validates the component weights. Parameters ---------- n_components : int The expected number of components. - weights : NDArray[np.float64] + weights : NDArray[DType] The array of weights to validate. Raises @@ -112,9 +128,14 @@ def _validate_weights(self, n_components: int, weights: NDArray[float64]): if np.any(weights < 0): raise ValueError("Weights must be positive.") - if not np.isclose(np.sum(weights), 1.0): + if not np.isclose(np.sum(weights), self.dtype(1.0)): raise ValueError(f"Sum of the weights must be equal 1, but it equal {np.sum(weights)}.") + @property + def dtype(self) -> type[DType]: + """type[DType]: The numpy data type of the mixture's outputs.""" + return self._dtype + @property def n_components(self): """int: The number of components in the mixture model.""" @@ -123,13 +144,13 @@ def n_components(self): @property def components(self): - """tuple[ContinuousDistribution, ...]: The components of the mixture.""" + """tuple[ContinuousDistribution[DType], ...]: The components of the mixture.""" return tuple(self._components) @property - def weights(self) -> NDArray[float64]: - """NDArray[np.float64]: The normalized weights of the components. + def weights(self) -> NDArray[DType]: + """NDArray[DType]: The normalized weights of the components. The weights are computed from the log-weights using the softmax function and cached for efficiency. @@ -141,8 +162,8 @@ def weights(self) -> NDArray[float64]: return self._cached_weights # type: ignore @property - def log_weights(self) -> NDArray[float64]: - """NDArray[np.float64]: The logarithm of the component weights.""" + def log_weights(self) -> NDArray[DType]: + """NDArray[DType]: The logarithm of the component weights.""" return self._log_weights @@ -162,11 +183,11 @@ def log_weights(self, new_log_weights: ArrayLike): number of components. """ - new_log_weights = np.asarray(new_log_weights, dtype=float64) + new_log_weights = np.asarray(new_log_weights, dtype=self.dtype) if len(new_log_weights) != self.n_components: raise ValueError("The length of the new logit vector does not match the number of components.") - self._log_weights = np.asarray(new_log_weights, dtype=float) + self._log_weights = new_log_weights self._cached_weights = None self._sorted_pairs_cache = None @@ -192,11 +213,13 @@ def add_component(self, component: "ContinuousDistribution", weight: float): if not (0 < weight < 1): raise ValueError("The weight of the new component must be in the range (0, 1).") - self._log_weights += np.log(1 - weight) - new_log_weight = np.log(weight) + d_weight = self.dtype(weight) + self._log_weights += np.log(self.dtype(1.0) - d_weight) + new_log_weight = np.log(d_weight) self._log_weights = np.append(self._log_weights, new_log_weight) - self._components.append(component) + new_component = component.astype(self.dtype) + self._components.append(new_component) self._cached_weights = None self._sorted_pairs_cache = None @@ -231,7 +254,7 @@ def remove_component(self, component_idx: int): self._cached_weights = None self._sorted_pairs_cache = None - def pdf(self, X: ArrayLike) -> NDArray[float64]: + def pdf(self, X: ArrayLike) -> NDArray[DType]: """Probability Density Function of the mixture. The PDF is computed as the weighted sum of the PDFs of its @@ -244,15 +267,15 @@ def pdf(self, X: ArrayLike) -> NDArray[float64]: Returns ------- - NDArray[np.float64] + NDArray[DType] The PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) component_pdfs = np.array([comp.pdf(X) for comp in self.components]) return np.asarray(np.dot(self.weights, component_pdfs)) - def lpdf(self, X: ArrayLike) -> NDArray[float64]: + def lpdf(self, X: ArrayLike) -> NDArray[DType]: """Logarithms of the Probability Density Function. Parameters @@ -262,17 +285,17 @@ def lpdf(self, X: ArrayLike) -> NDArray[float64]: Returns ------- - NDArray[np.float64] + NDArray[DType] The log-PDF values corresponding to each point in :attr:`X`. """ - X = np.atleast_1d(X) + X = np.atleast_1d(X).astype(self.dtype) component_lpdfs = np.array([comp.lpdf(X) for comp in self.components]) log_weights = self.log_weights log_terms = log_weights[:, np.newaxis] + component_lpdfs return logsumexp(log_terms, axis=0) # type: ignore - def loglikelihood(self, X: ArrayLike) -> float: + def loglikelihood(self, X: ArrayLike) -> DType: """Log-likelihood of the complete data :attr:`X`. The log-likelihood is the sum of the log-PDF values for all data @@ -289,10 +312,10 @@ def loglikelihood(self, X: ArrayLike) -> float: The total log-likelihood value. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) return np.sum(self.lpdf(X)) - def generate(self, size: int) -> NDArray[float64]: + def generate(self, size: int) -> NDArray[DType]: """Generates random samples from the mixture model. First, a component is chosen based on the mixture weights. Then, a @@ -306,13 +329,13 @@ def generate(self, size: int) -> NDArray[float64]: Returns ------- - NDArray[np.float64] + NDArray[DType] A NumPy array containing the generated samples. Returns an empty array if :attr:`size` is not positive. """ if size == 0: - return np.array([]) + return np.array([], dtype=self.dtype) component_choices = np.random.choice(self.n_components, size=size, p=self.weights) @@ -324,7 +347,7 @@ def generate(self, size: int) -> NDArray[float64]: np.random.shuffle(samples) return samples - def __getitem__(self, key: int) -> "ContinuousDistribution": + def __getitem__(self, key: int) -> "ContinuousDistribution[DType]": """Retrieves components by index. Parameters @@ -334,13 +357,13 @@ def __getitem__(self, key: int) -> "ContinuousDistribution": Returns ------- - ContinuousDistribution + ContinuousDistribution[DType] A single component of the mixture """ return self.components[key] - def __iter__(self) -> Iterator["ContinuousDistribution"]: + def __iter__(self) -> Iterator["ContinuousDistribution[DType]"]: """Returns an iterator over the mixture components. This allows the `MixtureModel` instance to be used directly in @@ -348,26 +371,26 @@ def __iter__(self) -> Iterator["ContinuousDistribution"]: Yields ------ - Iterator[ContinuousDistribution] + Iterator[ContinuousDistribution[DType] An iterator that yields the components of the mixture model. """ return iter(self.components) - def __copy__(self) -> "MixtureModel": + def __copy__(self) -> "MixtureModel[DType]": """Creates a copy of the mixture model instance. Returns ------- - MixtureModel + MixtureModel[DType] A new instance of the distribution, identical to the original. """ copied_components = [copy(component) for component in self._components] - new_mixture = MixtureModel(components=copied_components, weights=self.weights.copy()) + new_mixture = MixtureModel(components=copied_components, weights=self.weights.copy(), dtype=self.dtype) return new_mixture - def _get_sorted_pairs(self, for_hashing: bool = False) -> list[tuple["ContinuousDistribution", float]]: + def _get_sorted_pairs(self, for_hashing: bool = False) -> list[tuple["ContinuousDistribution[DType]", DType]]: """Internal helper to get component-weight pairs, sorted by component hash.""" if self._sorted_pairs_cache is None or for_hashing: @@ -401,7 +424,7 @@ def __eq__(self, other: object) -> bool: if not isinstance(other, MixtureModel): return NotImplemented - if self.n_components != other.n_components: + if self.dtype != other.dtype or self.n_components != other.n_components: return False self_pairs = self._get_sorted_pairs() @@ -425,4 +448,4 @@ def __hash__(self) -> int: """ sorted_pairs_for_hash = self._get_sorted_pairs(for_hashing=True) - return hash(tuple(sorted_pairs_for_hash)) + return hash((self.dtype, tuple(sorted_pairs_for_hash))) diff --git a/rework_pysatl_mpest/core/parameter.py b/rework_pysatl_mpest/core/parameter.py index d503bc94..6a14771a 100644 --- a/rework_pysatl_mpest/core/parameter.py +++ b/rework_pysatl_mpest/core/parameter.py @@ -5,12 +5,18 @@ It allows you to set invariants for parameter values and handle assignment errors, as well as to fix parameters from changes.""" -__author__ = "Danil Totmyanin" +__author__ = "Danil Totmyanin, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" -from typing import Callable, Union, overload +from typing import TYPE_CHECKING, Callable, Optional, Union, overload + +import numpy as np + +if TYPE_CHECKING: + from ..distributions.continuous_dist import ContinuousDistribution +from ..typings import DType class Parameter: @@ -64,7 +70,7 @@ def __init__( self.invariant = invariant self.error_message = error_message - def __set_name__(self, owner: type[object], name: str): + def __set_name__(self, owner: type["ContinuousDistribution[DType]"], name: str): """Sets the name for the public and private attributes. This method is automatically called when a descriptor instance is created @@ -73,7 +79,7 @@ def __set_name__(self, owner: type[object], name: str): Parameters ---------- - owner : type[object] + owner : type[ContinuousDistribution] The class that uses the descriptor. name : str The attribute name assigned to the descriptor instance. @@ -83,14 +89,16 @@ def __set_name__(self, owner: type[object], name: str): self.private_name = "_" + name @overload - def __get__(self, instance: None, owner: type[object]) -> "Parameter": + def __get__(self, instance: None, owner: type["ContinuousDistribution[DType]"]) -> "Parameter": """If access is via a class, return the descriptor object itself.""" @overload - def __get__(self, instance: object, owner: type[object]) -> float: + def __get__(self, instance: "ContinuousDistribution[DType]", owner: type["ContinuousDistribution[DType]"]) -> DType: """If access is via an object, return the value.""" - def __get__(self, instance: object | None, owner: type[object]) -> Union[float, "Parameter"]: + def __get__( + self, instance: Optional["ContinuousDistribution[DType]"], owner: type["ContinuousDistribution[DType]"] + ) -> Union[DType, "Parameter"]: """Returns the parameter value or the descriptor itself. If access is through an instance of the class, it returns the @@ -99,15 +107,15 @@ def __get__(self, instance: object | None, owner: type[object]) -> Union[float, Parameters ---------- - instance : object or None + instance : ContinuousDistribution, optional An instance of the owner class, or `None` if access is through the class. - owner : type[object] + owner : type[ContinuousDistribution] The owner class. Returns ------- - float or Parameter + DType or Parameter The value of the parameter or the descriptor itself. """ @@ -116,7 +124,7 @@ def __get__(self, instance: object | None, owner: type[object]) -> Union[float, return getattr(instance, self.private_name) - def __set__(self, instance: object, value: float): + def __set__(self, instance: "ContinuousDistribution[DType]", value: float): """Sets the parameter value after validation. Before setting a new value, it checks whether the parameter is @@ -124,7 +132,7 @@ def __set__(self, instance: object, value: float): Parameters ---------- - instance : object + instance : "ContinuousDistribution[DType]" An instance of the owner class. value : float The new value for the parameter. @@ -143,7 +151,10 @@ def __set__(self, instance: object, value: float): "This parameter is fixed." ) - if not self.invariant(value): + owner_dtype = getattr(instance, "dtype", np.float64) + d_value = owner_dtype(value) + + if not self.invariant(d_value): raise ValueError(f"Invalid value for '{self.public_name}': {self.error_message}") - setattr(instance, self.private_name, value) + setattr(instance, self.private_name, d_value) diff --git a/rework_pysatl_mpest/distributions/beta.py b/rework_pysatl_mpest/distributions/beta.py index ca19b51e..7c42d150 100644 --- a/rework_pysatl_mpest/distributions/beta.py +++ b/rework_pysatl_mpest/distributions/beta.py @@ -1,15 +1,16 @@ """Module providing four parametric beta distribution distribution class""" -__author__ = "Maksim Pastukhov" +__author__ = "Maksim Pastukhov, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" + import numpy as np -from numpy import float64 from scipy.special import digamma from scipy.stats import beta as beta_dist from ..core import Parameter +from ..typings import DType from .continuous_dist import ContinuousDistribution @@ -60,8 +61,15 @@ class Beta(ContinuousDistribution): left_border = Parameter() right_border = Parameter() - def __init__(self, alpha: float, beta: float, left_border: float, right_border: float): - super().__init__() + def __init__( + self, + alpha: float, + beta: float, + left_border: float, + right_border: float, + dtype: type[DType] = np.float64, # type: ignore[assignment] + ): + super().__init__(dtype=dtype) if left_border >= right_border: raise ValueError("Left border must be less than right border") self.alpha = alpha @@ -101,10 +109,11 @@ def pdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The PDF values corresponding to each point in :attr:`X`. """ + X = np.asarray(X, dtype=self.dtype) return np.exp(self.lpdf(X)) def ppf(self, P): @@ -128,14 +137,19 @@ def ppf(self, P): Returns ------- - NDArray[np.float64] + NDArray[DType] The PPF values corresponding to each probability in :attr:`P`. """ - P = np.asarray(P, dtype=float64) + P = np.asarray(P, dtype=self.dtype) + dtype = self.dtype + return np.where( (P >= 0) & (P <= 1), - (self.left_border + (self.right_border - self.left_border) * beta_dist.ppf(P, self.alpha, self.beta)), - np.nan, + ( + self.left_border + + (self.right_border - self.left_border) * beta_dist.ppf(P, self.alpha, self.beta).astype(dtype) + ), + dtype(np.nan), ) def lpdf(self, X): @@ -163,17 +177,19 @@ def lpdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The log-PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype Z = (X - self.left_border) / (self.right_border - self.left_border) - log_pdf_standard = beta_dist.logpdf(Z, self.alpha, self.beta) + log_pdf_standard = beta_dist.logpdf(Z, self.alpha, self.beta).astype(dtype) + result = log_pdf_standard - np.log(self.right_border - self.left_border) - return log_pdf_standard - np.log(self.right_border - self.left_border) + return np.atleast_1d(result) def _dlog_alpha(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`alpha` parameter. @@ -198,18 +214,20 @@ def _dlog_alpha(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`alpha` for each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + in_bounds = (self.left_border < X) & (self.right_border >= X) return np.where( in_bounds, np.log(X - self.left_border) - np.log(self.right_border - self.left_border) - - (digamma(self.alpha) - digamma(self.alpha + self.beta)), - 0.0, + - (dtype(digamma(self.alpha)) - dtype(digamma(self.alpha + self.beta))), + dtype(0.0), ) def _dlog_beta(self, X): @@ -235,18 +253,20 @@ def _dlog_beta(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`beta` for each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + in_bounds = (self.left_border < X) & (self.right_border >= X) return np.where( in_bounds, np.log(self.right_border - X) - np.log(self.right_border - self.left_border) - - (digamma(self.beta) - digamma(self.alpha + self.beta)), - 0.0, + - (dtype(digamma(self.beta)) - dtype(digamma(self.alpha + self.beta))), + dtype(0.0), ) def _dlog_left_border(self, X): @@ -270,19 +290,21 @@ def _dlog_left_border(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`left_border` for each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + in_bounds = (self.left_border < X) & (self.right_border >= X) return np.where( in_bounds, ( - ((self.alpha + self.beta - 1) / (self.right_border - self.left_border)) - - ((self.alpha - 1) / (X - self.left_border)) + ((self.alpha + self.beta - dtype(1)) / (self.right_border - self.left_border)) + - ((self.alpha - dtype(1)) / (X - self.left_border)) ), - 0.0, + dtype(0.0), ) def _dlog_right_border(self, X): @@ -306,18 +328,20 @@ def _dlog_right_border(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`right_border` for each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + in_bounds = (self.left_border < X) & (self.right_border >= X) return np.where( in_bounds, ( - ((self.beta - 1) / (self.right_border - X)) - - ((self.alpha + self.beta - 1) / (self.right_border - self.left_border)) + ((self.beta - dtype(1)) / (self.right_border - X)) + - ((self.alpha + self.beta - dtype(1)) / (self.right_border - self.left_border)) ), - 0.0, + dtype(0.0), ) def log_gradients(self, X): @@ -332,13 +356,13 @@ def log_gradients(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] An array where each row corresponds to a data point in :attr:`X` and each column corresponds to the gradient with respect to a specific optimizable parameter. The order of columns corresponds to the sorted order of :attr:`self.params_to_optimize`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) gradient_calculators = { self.PARAM_ALPHA: self._dlog_alpha, @@ -350,7 +374,7 @@ def log_gradients(self, X): optimizable_params = sorted(list(self.params_to_optimize)) if not optimizable_params: - return np.empty((len(X), 0)) + return np.empty((len(X), 0), dtype=self.dtype) gradients = [gradient_calculators[param](X) for param in optimizable_params] @@ -366,7 +390,7 @@ def generate(self, size: int): Returns ------- - NDArray[np.float64] + NDArray[DType] A NumPy array containing the generated samples. """ @@ -374,7 +398,7 @@ def generate(self, size: int): beta_dist.rvs( self.alpha, self.beta, loc=self.left_border, scale=self.right_border - self.left_border, size=size ), - dtype=float64, + dtype=self.dtype, ) def __repr__(self) -> str: diff --git a/rework_pysatl_mpest/distributions/cauchy.py b/rework_pysatl_mpest/distributions/cauchy.py index 00c39f5d..a0ebcbcf 100644 --- a/rework_pysatl_mpest/distributions/cauchy.py +++ b/rework_pysatl_mpest/distributions/cauchy.py @@ -1,18 +1,19 @@ """Module providing Cauchy distribution class""" -__author__ = "Maksim Pastukhov" +__author__ = "Maksim Pastukhov, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" + import numpy as np -from numpy import float64 from scipy.stats import cauchy from ..core import Parameter +from ..typings import DType from .continuous_dist import ContinuousDistribution -class Cauchy(ContinuousDistribution): +class Cauchy(ContinuousDistribution[DType]): """Class for the two-parameter cauchy distribution. Parameters @@ -49,8 +50,8 @@ class Cauchy(ContinuousDistribution): loc = Parameter() scale = Parameter(lambda x: x > 0.0, "Scale parameter should be positive") - def __init__(self, loc: float, scale: float): - super().__init__() + def __init__(self, loc: float, scale: float, dtype: type[DType] = np.float64): # type: ignore[assignment] + super().__init__(dtype=dtype) self.loc = loc self.scale = scale @@ -81,12 +82,14 @@ def pdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) - return 1.0 / (np.pi * self.scale * (1.0 + ((X - self.loc) / self.scale) ** 2)) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + + return dtype(1.0) / (dtype(np.pi) * self.scale * (dtype(1.0) + ((X - self.loc) / self.scale) ** 2)) def ppf(self, P): """Percent Point Function (PPF) or quantile function. @@ -107,18 +110,20 @@ def ppf(self, P): Returns ------- - NDArray[np.float64] + NDArray[DType] The PPF values corresponding to each probability in :attr:`P`. """ - P = np.asarray(P, dtype=float64) + P = np.asarray(P, dtype=self.dtype) + dtype = self.dtype + return np.where( (P >= 0) & (P <= 1), np.where( (P == 0) | (P == 1), - np.where(P == 1, np.inf, -np.inf), - self.loc + self.scale * np.tan(np.pi * (P - 0.5)), + np.where(P == 1, dtype(np.inf), dtype(-np.inf)), + self.loc + self.scale * np.tan(dtype(np.pi) * (P - dtype(0.5))), ), - np.nan, + dtype(np.nan), ) def lpdf(self, X): @@ -140,11 +145,14 @@ def lpdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The log-PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) - return np.log(1.0) - np.log(np.pi) - np.log(self.scale) - np.log(1.0 + ((X - self.loc) / self.scale) ** 2) + + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + + return np.log(dtype(1.0)) - np.log(dtype(np.pi)) - np.log(self.scale) - np.log(dtype(1.0) + ((X - self.loc) / self.scale) ** 2) def _dlog_loc(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`loc` parameter. @@ -166,12 +174,14 @@ def _dlog_loc(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`loc` for each point in ::attr`X`. """ - X = np.asarray(X, dtype=float64) - return (2 * X - 2 * self.loc) / (self.scale**2 + X**2 - 2 * self.loc * X + self.loc**2) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + + return (dtype(2) * X - dtype(2) * self.loc) / (self.scale**2 + X**2 - dtype(2) * self.loc * X + self.loc**2) def _dlog_scale(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`scale` parameter. @@ -193,12 +203,14 @@ def _dlog_scale(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`rate` for each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) - return (-(self.scale**2) + X**2 - 2 * self.loc * X + self.loc**2) / ( - self.scale**3 + self.scale * (X**2) - 2 * self.loc * self.scale * X + self.scale * self.loc**2 + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + + return (-(self.scale**2) + X**2 - dtype(2) * self.loc * X + self.loc**2) / ( + self.scale**3 + self.scale * (X**2) - dtype(2) * self.loc * self.scale * X + self.scale * self.loc**2 ) def log_gradients(self, X): @@ -213,13 +225,13 @@ def log_gradients(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] An array where each row corresponds to a data point in :attr:`X` and each column corresponds to the gradient with respect to a specific optimizable parameter. The order of columns corresponds to the sorted order of :attr:`self.params_to_optimize`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) gradient_calculators = { self.PARAM_LOC: self._dlog_loc, @@ -229,7 +241,7 @@ def log_gradients(self, X): optimizable_params = sorted(list(self.params_to_optimize)) if not optimizable_params: - return np.empty((len(X), 0)) + return np.empty((len(X), 0), dtype=self.dtype) gradients = [gradient_calculators[param](X) for param in optimizable_params] @@ -245,11 +257,11 @@ def generate(self, size: int): Returns ------- - NDArray[np.float64] + NDArray[DType] A NumPy array containing the generated samples. """ - return np.asarray(cauchy.rvs(loc=self.loc, scale=self.scale, size=size), dtype=float64) + return np.asarray(cauchy.rvs(loc=self.loc, scale=self.scale, size=size), dtype=self.dtype) def __repr__(self) -> str: """Returns a string representation of the object. diff --git a/rework_pysatl_mpest/distributions/continuous_dist.py b/rework_pysatl_mpest/distributions/continuous_dist.py index f0d4b105..6f0d19c0 100644 --- a/rework_pysatl_mpest/distributions/continuous_dist.py +++ b/rework_pysatl_mpest/distributions/continuous_dist.py @@ -8,12 +8,15 @@ from abc import ABC, abstractmethod from collections.abc import Sequence +from typing import Generic -from numpy import float64 +import numpy as np from numpy.typing import ArrayLike, NDArray +from ..typings import DType -class ContinuousDistribution(ABC): + +class ContinuousDistribution(ABC, Generic[DType]): """Abstract base class for continuous distributions. This class defines the basic mathematical functions of distributions @@ -48,6 +51,7 @@ class ContinuousDistribution(ABC): unfix_param get_params_vector set_params_from_vector + astype **Abstract methods** @@ -84,12 +88,23 @@ class ContinuousDistribution(ABC): """ - def __init__(self): - """The constructor must be called by all descendants for the - `fixed_params` attribute to be initialized. + _dtype: type[DType] + + def __init__(self, dtype: type[DType] = np.float64): # type: ignore[assignment] + """This constructor must be called by all descendants to ensure + proper initialization of common attributes like `fixed_params` + and `dtype`. + + Parameters + ---------- + dtype : Type[DType], optional + The numpy data type used for internal calculations and + output arrays (e.g., `np.float32` or `np.float64`). + Defaults to `np.float64`. """ self._fixed_params: set[str] = set() + self._dtype = dtype def fix_param(self, name: str): """Fixes a parameter, excluding it from optimization and further changes. @@ -123,7 +138,7 @@ def unfix_param(self, name: str): self._fixed_params.discard(name) - def get_params_vector(self, param_names: Sequence[str]) -> list[float]: + def get_params_vector(self, param_names: Sequence[str]) -> list[DType]: """Retrieves specified parameter values as a list. Parameters @@ -155,7 +170,10 @@ def set_params_from_vector(self, param_names: Sequence[str], vector: Sequence[fl Updates the distribution's parameters using values from the provided sequence. The order of values in the :attr:`vector` must correspond to the order - of names in :attr:`param_names`. + of names in :attr:`param_names`. This vector can contain + standard numerical types like `int` or `float`. Internally, each + value is automatically cast to the distribution's specific `dtype` + (e.g., `numpy.float32` or `numpy.float64`) Parameters ---------- @@ -179,7 +197,12 @@ def set_params_from_vector(self, param_names: Sequence[str], vector: Sequence[fl raise ValueError(f"Invalid parameter names provided: {invalid_params}") for name, value in zip(param_names, vector): - setattr(self, name, value) + setattr(self, name, self.dtype(value)) + + @property + def dtype(self) -> type[DType]: + """type[DType]: The numpy data type of the distribution's outputs.""" + return self._dtype @property @abstractmethod @@ -198,7 +221,7 @@ def params_to_optimize(self) -> set[str]: return self.params - self._fixed_params @abstractmethod - def pdf(self, X: ArrayLike) -> NDArray[float64]: + def pdf(self, X: ArrayLike) -> NDArray[DType]: """Probability Density Function. Parameters @@ -208,12 +231,12 @@ def pdf(self, X: ArrayLike) -> NDArray[float64]: Returns ------- - NDArray[np.float64] + NDArray[DType] The PDF values corresponding to each point in :attr:`X`. """ @abstractmethod - def ppf(self, P: ArrayLike) -> NDArray[float64]: + def ppf(self, P: ArrayLike) -> NDArray[DType]: """Percent Point Function (PPF) or quantile function. This is the inverse of the Cumulative Distribution Function (CDF). @@ -226,12 +249,12 @@ def ppf(self, P: ArrayLike) -> NDArray[float64]: Returns ------- - NDArray[np.float64] + NDArray[DType] The PPF values corresponding to each probability in :attr:`P`. """ @abstractmethod - def lpdf(self, X: ArrayLike) -> NDArray[float64]: + def lpdf(self, X: ArrayLike) -> NDArray[DType]: """Logarithm of the Probability Density Function. Evaluating the log-PDF is often more numerically stable than @@ -245,12 +268,12 @@ def lpdf(self, X: ArrayLike) -> NDArray[float64]: Returns ------- - NDArray[np.float64] + NDArray[DType] The log-PDF values corresponding to each point in :attr:`X`. """ @abstractmethod - def log_gradients(self, X: ArrayLike) -> NDArray[float64]: + def log_gradients(self, X: ArrayLike) -> NDArray[DType]: """Calculates the gradients of the log-PDF with respect to its parameters. The gradients are computed for the parameters that are not fixed. @@ -262,7 +285,7 @@ def log_gradients(self, X: ArrayLike) -> NDArray[float64]: Returns ------- - NDArray[np.float64] + NDArray[DType] An array where each row corresponds to a data point in :attr:`X` and each column corresponds to the gradient with respect to a specific optimizable parameter. The order of columns corresponds to the @@ -270,7 +293,7 @@ def log_gradients(self, X: ArrayLike) -> NDArray[float64]: """ @abstractmethod - def generate(self, size: int) -> NDArray[float64]: + def generate(self, size: int) -> NDArray[DType]: """Generates random samples from the distribution. Parameters @@ -280,21 +303,49 @@ def generate(self, size: int) -> NDArray[float64]: Returns ------- - NDArray[np.float64] + NDArray[DType] A NumPy array containing the generated samples. """ - def __copy__(self) -> "ContinuousDistribution": + def astype(self, new_dtype: type[DType]) -> "ContinuousDistribution[DType]": + """Creates a copy of the distribution with a new data type. + + If the specified `new_dtype` is the same as the instance's current `dtype`, + this method returns the original instance instead. + + Parameters + ---------- + new_dtype : type[DType] + The target NumPy data type for the new distribution instance. + + Returns + ------- + ContinuousDistribution[DType] + A new distribution instance with all parameters converted to the + specified `new_dtype`, or the original instance if the `dtype` is + unchanged. + """ + if self._dtype is new_dtype: + return self + + params_dict = {p: new_dtype(getattr(self, p)) for p in self.params} + + new_instance = self.__class__(**params_dict, dtype=new_dtype) + new_instance._fixed_params = self._fixed_params.copy() + + return new_instance + + def __copy__(self) -> "ContinuousDistribution[DType]": """Creates a copy of the distribution instance. Returns ------- - ContinuousDistribution + ContinuousDistribution[DType] A new instance of the distribution, identical to the original. """ params_dict = {p: getattr(self, p) for p in self.params} - new_instance = self.__class__(**params_dict) + new_instance = self.__class__(**params_dict, dtype=self.dtype) new_instance._fixed_params = self._fixed_params.copy() return new_instance @@ -327,6 +378,7 @@ def __eq__(self, other: object): self.name == other.name and self.params == other.params and self.get_params_vector(sorted_params) == other.get_params_vector(sorted_params) + and self.dtype == other.dtype ) def __hash__(self) -> int: @@ -343,4 +395,4 @@ def __hash__(self) -> int: sorted_params = sorted(list(self.params)) param_values = tuple(self.get_params_vector(sorted_params)) - return hash(tuple([self.name, tuple(self.params), param_values])) + return hash(tuple([self.name, tuple(self.params), self.dtype, param_values])) diff --git a/rework_pysatl_mpest/distributions/exponential.py b/rework_pysatl_mpest/distributions/exponential.py index f228c728..176177dd 100644 --- a/rework_pysatl_mpest/distributions/exponential.py +++ b/rework_pysatl_mpest/distributions/exponential.py @@ -1,19 +1,19 @@ """Module providing exponential distribution class""" -__author__ = "Danil Totmyanin" +__author__ = "Danil Totmyanin, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" import numpy as np -from numpy import float64 from scipy.stats import expon from ..core import Parameter +from ..typings import DType from .continuous_dist import ContinuousDistribution -class Exponential(ContinuousDistribution): +class Exponential(ContinuousDistribution[DType]): """Class for the two-parameter exponential distribution. Parameters @@ -50,8 +50,8 @@ class Exponential(ContinuousDistribution): loc = Parameter() rate = Parameter(lambda x: x > 0, "Rate parameter must be a positive") - def __init__(self, loc: float, rate: float): - super().__init__() + def __init__(self, loc: float, rate: float, dtype: type[DType] = np.float64): # type: ignore[assignment] + super().__init__(dtype=dtype) self.loc = loc self.rate = rate @@ -82,13 +82,14 @@ def pdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype - return np.where(self.loc <= X, self.rate * np.exp(-self.rate * (X - self.loc)), 0.0) + return np.where(self.loc <= X, self.rate * np.exp(-self.rate * (X - self.loc)), dtype(0.0)) def ppf(self, P): """Percent Point Function (PPF) or quantile function. @@ -106,13 +107,14 @@ def ppf(self, P): Returns ------- - NDArray[np.float64] + NDArray[DType] The PPF values corresponding to each probability in :attr:`P`. """ - P = np.asarray(P, dtype=float64) + P = np.asarray(P, dtype=self.dtype) + dtype = self.dtype - return np.where((P >= 0) & (P <= 1), self.loc - np.log(1 - P) / self.rate, np.nan) + return np.where((P >= 0) & (P <= 1), self.loc - np.log(dtype(1) - P) / self.rate, dtype(np.nan)) def lpdf(self, X): """Log of the Probability Density Function (LPDF). @@ -130,12 +132,14 @@ def lpdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The log-PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) - return np.where(self.loc <= X, np.log(self.rate) - self.rate * (X - self.loc), -np.inf) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + + return np.where(self.loc <= X, np.log(self.rate) - self.rate * (X - self.loc), dtype(-np.inf)) def _dlog_loc(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`loc` parameter. @@ -155,12 +159,14 @@ def _dlog_loc(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`loc` for each point in ::attr`X`. """ - X = np.asarray(X, dtype=float64) - return np.where(self.loc <= X, self.rate, 0.0) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + + return np.where(self.loc <= X, self.rate, dtype(0.0)) def _dlog_rate(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`rate` parameter. @@ -180,12 +186,13 @@ def _dlog_rate(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`rate` for each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) - return np.where(self.loc <= X, 1.0 / self.rate - (X - self.loc), 0.0) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + return np.where(self.loc <= X, dtype(1.0) / self.rate - (X - self.loc), dtype(0.0)) def log_gradients(self, X): """Calculates the gradients of the log-PDF w.r.t. its parameters. @@ -199,14 +206,14 @@ def log_gradients(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] An array where each row corresponds to a data point in :attr:`X` and each column corresponds to the gradient with respect to a specific optimizable parameter. The order of columns corresponds to the sorted order of :attr:`self.params_to_optimize`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) gradient_calculators = { self.PARAM_LOC: self._dlog_loc, @@ -216,7 +223,7 @@ def log_gradients(self, X): optimizable_params = sorted(list(self.params_to_optimize)) if not optimizable_params: - return np.empty((len(X), 0)) + return np.empty((len(X), 0), dtype=self.dtype) gradients = [gradient_calculators[param](X) for param in optimizable_params] @@ -232,11 +239,11 @@ def generate(self, size: int): Returns ------- - NDArray[np.float64] + NDArray[DType] A NumPy array containing the generated samples. """ - return np.asarray(expon.rvs(loc=self.loc, scale=1 / self.rate, size=size), dtype=float64) + return np.asarray(expon.rvs(loc=self.loc, scale=1 / self.rate, size=size), dtype=self.dtype) def __repr__(self) -> str: """Returns a string representation of the object. diff --git a/rework_pysatl_mpest/distributions/normal.py b/rework_pysatl_mpest/distributions/normal.py index e68a841e..2460fb71 100644 --- a/rework_pysatl_mpest/distributions/normal.py +++ b/rework_pysatl_mpest/distributions/normal.py @@ -1,18 +1,19 @@ """Module providing normal (Gaussian) distribution class""" -__author__ = "Danil Totmyanin" +__author__ = "Danil Totmyanin, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" + import numpy as np -from numpy import float64 from scipy.stats import norm from ..core import Parameter +from ..typings import DType from .continuous_dist import ContinuousDistribution -class Normal(ContinuousDistribution): +class Normal(ContinuousDistribution[DType]): """Class for the Normal (Gaussian) distribution. Parameters @@ -49,8 +50,8 @@ class Normal(ContinuousDistribution): loc = Parameter() scale = Parameter(lambda x: x > 0, "Scale parameter must be positive") - def __init__(self, loc: float, scale: float): - super().__init__() + def __init__(self, loc: float, scale: float, dtype: type[DType] = np.float64): # type: ignore[assignment] + super().__init__(dtype=dtype) self.loc = loc self.scale = scale @@ -82,13 +83,15 @@ def pdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + z = (X - self.loc) / self.scale - return np.exp(-(z**2) / 2.0) / (self.scale * np.sqrt(2.0 * np.pi)) + return np.exp(-(z**2) / dtype(2.0)) / (self.scale * np.sqrt(dtype(2.0) * dtype(np.pi))) def ppf(self, P): """Percent Point Function (PPF) or quantile function. @@ -104,12 +107,14 @@ def ppf(self, P): Returns ------- - NDArray[np.float64] + NDArray[DType] The PPF values corresponding to each probability in :attr:`P`. """ - P = np.asarray(P, dtype=float64) - return norm.ppf(P, loc=self.loc, scale=self.scale) + P = np.asarray(P, dtype=self.dtype) + result = norm.ppf(P, loc=self.loc, scale=self.scale) + + return np.asarray(result, dtype=self.dtype) def lpdf(self, X): """Log of the Probability Density Function (LPDF). @@ -128,26 +133,30 @@ def lpdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The log-PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + z = (X - self.loc) / self.scale - return -np.log(self.scale) - 0.5 * np.log(2.0 * np.pi) - 0.5 * z**2 + return -np.log(self.scale) - dtype(0.5) * np.log(dtype(2.0) * dtype(np.pi)) - dtype(0.5) * z**2 def _dlog_loc(self, X): """Partial derivative of the lpdf w.r.t. the loc parameter.""" - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) return (X - self.loc) / (self.scale**2) def _dlog_scale(self, X): """Partial derivative of the lpdf w.r.t. the scale parameter.""" - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + z_sq = ((X - self.loc) / self.scale) ** 2 - return (z_sq - 1.0) / self.scale + return (z_sq - dtype(1.0)) / self.scale def log_gradients(self, X): """Calculates the gradients of the log-PDF w.r.t. its parameters. @@ -159,14 +168,14 @@ def log_gradients(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] An array where each row corresponds to a data point in :attr:`X` and each column corresponds to the gradient with respect to a specific optimizable parameter. The order of columns corresponds to the sorted order of :attr:`self.params_to_optimize`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) gradient_calculators = { self.PARAM_LOC: self._dlog_loc, @@ -176,7 +185,7 @@ def log_gradients(self, X): optimizable_params = sorted(list(self.params_to_optimize)) if not optimizable_params: - return np.empty((len(X), 0)) + return np.empty((len(X), 0), dtype=self.dtype) gradients = [gradient_calculators[param](X) for param in optimizable_params] return np.stack(gradients, axis=1) @@ -193,11 +202,11 @@ def generate(self, size: int): Returns ------- - NDArray[np.float64] + NDArray[DType] A NumPy array containing the generated samples. """ - return np.asarray(norm.rvs(loc=self.loc, scale=self.scale, size=size), dtype=float64) + return np.asarray(norm.rvs(loc=self.loc, scale=self.scale, size=size), dtype=self.dtype) def __repr__(self) -> str: """Returns a string representation of the object. diff --git a/rework_pysatl_mpest/distributions/pareto.py b/rework_pysatl_mpest/distributions/pareto.py index 0ec75bf3..e9ed9866 100644 --- a/rework_pysatl_mpest/distributions/pareto.py +++ b/rework_pysatl_mpest/distributions/pareto.py @@ -1,19 +1,19 @@ """Module providing pareto type 1 distribution class""" -__author__ = "Maksim Pastukhov" +__author__ = "Maksim Pastukhov, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" import numpy as np -from numpy import float64 from scipy.stats import pareto -from rework_pysatl_mpest.core.parameter import Parameter -from rework_pysatl_mpest.distributions.continuous_dist import ContinuousDistribution +from ..core.parameter import Parameter +from ..distributions.continuous_dist import ContinuousDistribution +from ..typings import DType -class Pareto(ContinuousDistribution): +class Pareto(ContinuousDistribution[DType]): """Class for the two-parameter Pareto distribution. The Pareto distribution is a power-law probability distribution commonly used @@ -53,8 +53,8 @@ class Pareto(ContinuousDistribution): shape = Parameter(lambda x: x > 0, "Shape parameter must be a positive") scale = Parameter(lambda x: x > 0, "Scale parameter must be a positive") - def __init__(self, shape: float, scale: float): - super().__init__() + def __init__(self, shape: float, scale: float, dtype: type[DType] = np.float64): # type: ignore[assignment] + super().__init__(dtype=dtype) self.shape = shape self.scale = scale @@ -78,9 +78,12 @@ def pdf(self, X): where :math:`\\alpha` is the :attr:`shape` parameter and :math:`\\beta` is the :attr:`scale` parameter. The function is zero for :math:`x < \\beta`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype - return np.where(self.scale <= X, (self.shape * (self.scale**self.shape)) / X ** (self.shape + 1), 0.0) + return np.where( + self.scale <= X, (self.shape * (self.scale**self.shape)) / X ** (self.shape + dtype(1)), dtype(0.0) + ) def ppf(self, P): """Percent Point Function (PPF) or quantile function. @@ -98,13 +101,14 @@ def ppf(self, P): Returns ------- - NDArray[np.float64] + NDArray[DType] The PPF values corresponding to each probability in :attr:`P`. """ - P = np.asarray(P, dtype=float64) + P = np.asarray(P, dtype=self.dtype) + dtype = self.dtype - return np.where((P >= 0) & (P <= 1), self.scale * (1 - P) ** (-1.0 / self.shape), np.nan) + return np.where((P >= 0) & (P <= 1), self.scale * (dtype(1) - P) ** (dtype(-1.0) / self.shape), dtype(np.nan)) def lpdf(self, X): """Log of the Probability Density Function (LPDF). @@ -125,15 +129,17 @@ def lpdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The log-PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + return np.where( self.scale <= X, - np.log(self.shape) + self.shape * np.log(self.scale) - (1 + self.shape) * np.log(X), - -np.inf, + np.log(self.shape) + self.shape * np.log(self.scale) - (dtype(1) + self.shape) * np.log(X), + dtype(-np.inf), ) def _dlog_shape(self, X): @@ -144,7 +150,7 @@ def _dlog_shape(self, X): .. math:: \\frac{\\partial \\ln f(x | \\alpha, \\beta)}{\\partial \\alpha} = - \\ln \\beta - \\ln x + \\frac{1}{\\alpha} + \\ln \\beta - \\ln x where :math:`\\alpha` is the :attr:`shape` parameter and :math:`\\beta` is the :attr:`scale` parameter. @@ -156,12 +162,14 @@ def _dlog_shape(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`shape` for each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) - return np.where(self.scale <= X, 1.0 / self.shape + np.log(self.scale) - np.log(X), 0.0) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + + return np.where(self.scale <= X, dtype(1.0) / self.shape + np.log(self.scale) - np.log(X), dtype(0.0)) def _dlog_scale(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`scale` parameter. @@ -182,12 +190,14 @@ def _dlog_scale(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The gradient of the lpdf with respect to :attr:`scale` for each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) - return np.where(self.scale <= X, self.shape / self.scale, 0.0) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + + return np.where(self.scale <= X, self.shape / self.scale, dtype(0.0)) def log_gradients(self, X): """Calculates the gradients of the log-PDF w.r.t. its parameters. @@ -201,14 +211,14 @@ def log_gradients(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] An array where each row corresponds to a data point in :attr:`X` and each column corresponds to the gradient with respect to a specific optimizable parameter. The order of columns corresponds to the sorted order of :attr:`self.params_to_optimize`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) gradient_calculators = { self.PARAM_SHAPE: self._dlog_shape, @@ -218,7 +228,7 @@ def log_gradients(self, X): optimizable_params = sorted(list(self.params_to_optimize)) if not optimizable_params: - return np.empty((len(X), 0)) + return np.empty((len(X), 0), dtype=self.dtype) gradients = [gradient_calculators[param](X) for param in optimizable_params] @@ -234,11 +244,11 @@ def generate(self, size: int): Returns ------- - NDArray[np.float64] + NDArray[DType] A NumPy array containing the generated samples. """ - return np.asarray(pareto.rvs(scale=self.scale, b=self.shape, size=size), dtype=float64) + return np.asarray(pareto.rvs(scale=self.scale, b=self.shape, size=size), dtype=self.dtype) def __repr__(self) -> str: """Returns a string representation of the object. diff --git a/rework_pysatl_mpest/distributions/uniform.py b/rework_pysatl_mpest/distributions/uniform.py index e5e4eb2f..d4b0aec9 100644 --- a/rework_pysatl_mpest/distributions/uniform.py +++ b/rework_pysatl_mpest/distributions/uniform.py @@ -1,18 +1,19 @@ """Module providing uniform distribution class""" -__author__ = "Maksim Pastukhov" +__author__ = "Maksim Pastukhov, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" + import numpy as np -from numpy import float64 from scipy.stats import uniform from ..core import Parameter +from ..typings import DType from .continuous_dist import ContinuousDistribution -class Uniform(ContinuousDistribution): +class Uniform(ContinuousDistribution[DType]): """ The Uniform continuous probability distribution. @@ -59,8 +60,8 @@ class Uniform(ContinuousDistribution): left_border = Parameter() right_border = Parameter() - def __init__(self, left_border: float, right_border: float): - super().__init__() + def __init__(self, left_border: float, right_border: float, dtype: type[DType] = np.float64): # type: ignore[assignment] + super().__init__(dtype=dtype) if left_border >= right_border: raise ValueError("right_border parameter must be strictly greater than left_border") if not (np.isfinite(left_border) and np.isfinite(right_border)): @@ -99,9 +100,13 @@ def pdf(self, X): NDArray[np.float64] The PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + return np.where( - (self.left_border <= X) & (self.right_border >= X), 1.0 / (self.right_border - self.left_border), 0.0 + (self.left_border <= X) & (self.right_border >= X), + dtype(1.0) / (self.right_border - self.left_border), + dtype(0.0), ) def ppf(self, P): @@ -126,8 +131,12 @@ def ppf(self, P): NDArray[np.float64] The PPF values corresponding to each probability in :attr:`P`. """ - P = np.asarray(P, dtype=float64) - return np.where((P >= 0) & (P <= 1), self.left_border + P * (self.right_border - self.left_border), np.nan) + P = np.asarray(P, dtype=self.dtype) + dtype = self.dtype + + return np.where( + (P >= 0) & (P <= 1), self.left_border + P * (self.right_border - self.left_border), dtype(np.nan) + ) def lpdf(self, X): """Log of the Probability Density Function (LPDF). @@ -152,10 +161,12 @@ def lpdf(self, X): NDArray[np.float64] The log-PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + in_range = (self.left_border <= X) & (self.right_border >= X) valid_dist = self.right_border > self.left_border - return np.where(in_range & valid_dist, -np.log(self.right_border - self.left_border), -np.inf) + return np.where(in_range & valid_dist, -np.log(self.right_border - self.left_border), dtype(-np.inf)) def _dlog_left_border(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`left_border` parameter. @@ -168,9 +179,11 @@ def _dlog_left_border(self, X): right_border parameter. The derivative is non-zero only for `left_border <= X <= right_border`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + in_range = (self.left_border <= X) & (self.right_border >= X) - return np.where(in_range, 1.0 / (self.right_border - self.left_border), 0.0) + return np.where(in_range, dtype(1.0) / (self.right_border - self.left_border), dtype(0.0)) def _dlog_right_border(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`right_border` parameter. @@ -183,9 +196,11 @@ def _dlog_right_border(self, X): right_border parameter. The derivative is non-zero only for `left_border <= X <= right_border`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + in_range = (self.left_border <= X) & (self.right_border >= X) - return np.where(in_range, -1.0 / (self.right_border - self.left_border), 0.0) + return np.where(in_range, dtype(-1.0) / (self.right_border - self.left_border), dtype(0.0)) def log_gradients(self, X): """Calculates the gradients of the log-PDF w.r.t. its parameters. @@ -206,7 +221,7 @@ def log_gradients(self, X): to the sorted order of :attr:`self.params_to_optimize`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) gradient_calculators = { self.LEFT_BORDER: self._dlog_left_border, @@ -216,7 +231,7 @@ def log_gradients(self, X): optimizable_params = sorted(list(self.params_to_optimize)) if not optimizable_params: - return np.empty((len(X), 0)) + return np.empty((len(X), 0), dtype=self.dtype) gradients = [gradient_calculators[param](X) for param in optimizable_params] @@ -237,7 +252,7 @@ def generate(self, size: int): """ return np.asarray( - uniform.rvs(loc=self.left_border, scale=self.right_border - self.left_border, size=size), dtype=float64 + uniform.rvs(loc=self.left_border, scale=self.right_border - self.left_border, size=size), dtype=self.dtype ) def __repr__(self) -> str: diff --git a/rework_pysatl_mpest/distributions/weibull.py b/rework_pysatl_mpest/distributions/weibull.py index 3d9788f3..2e966b97 100644 --- a/rework_pysatl_mpest/distributions/weibull.py +++ b/rework_pysatl_mpest/distributions/weibull.py @@ -1,19 +1,19 @@ """Module providing three-parametric weibull distribution class""" -__author__ = "Danil Totmyanin" +__author__ = "Danil Totmyanin, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" import numpy as np -from numpy import float64 from scipy.stats import weibull_min from ..core import Parameter +from ..typings import DType from .continuous_dist import ContinuousDistribution -class Weibull(ContinuousDistribution): +class Weibull(ContinuousDistribution[DType]): """Class for the three-parameter Weibull distribution. Parameters @@ -55,8 +55,8 @@ class Weibull(ContinuousDistribution): loc = Parameter() scale = Parameter(lambda x: x > 0, "Scale parameter must be positive") - def __init__(self, shape: float, loc: float, scale: float): - super().__init__() + def __init__(self, shape: float, loc: float, scale: float, dtype: type[DType] = np.float64): # type: ignore[assignment] + super().__init__(dtype=dtype) self.shape = shape self.loc = loc self.scale = scale @@ -91,19 +91,21 @@ def pdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + z = (X - self.loc) / self.scale # PDF is 0 for x < loc, and handle cases where z=0 and shape<1 # which would lead to division by zero. with np.errstate(divide="ignore", invalid="ignore"): - pdf_vals = (self.shape / self.scale) * np.power(z, self.shape - 1) * np.exp(-np.power(z, self.shape)) + pdf_vals = (self.shape / self.scale) * np.power(z, self.shape - dtype(1)) * np.exp(-np.power(z, self.shape)) - return np.where(self.loc <= X, np.nan_to_num(pdf_vals, nan=0.0, posinf=np.inf), 0.0) + return np.where(self.loc <= X, np.nan_to_num(pdf_vals, nan=dtype(0.0), posinf=dtype(np.inf)), dtype(0.0)) def ppf(self, P): """Percent Point Function (PPF) or quantile function. @@ -122,13 +124,15 @@ def ppf(self, P): Returns ------- - NDArray[np.float64] + NDArray[DType] The PPF values corresponding to each probability in :attr:`P`. """ - P = np.asarray(P, dtype=float64) - ppf_vals = self.loc + self.scale * np.power(-np.log(1 - P), 1.0 / self.shape) - return np.where((P >= 0) & (P <= 1), ppf_vals, np.nan) + P = np.asarray(P, dtype=self.dtype) + dtype = self.dtype + + ppf_vals = self.loc + self.scale * np.power(-np.log(dtype(1) - P), dtype(1.0) / self.shape) + return np.where((P >= 0) & (P <= 1), ppf_vals, dtype(np.nan)) def lpdf(self, X): """Log of the Probability Density Function (LPDF). @@ -148,41 +152,53 @@ def lpdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The log-PDF values corresponding to each point in :attr:`X`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + z = (X - self.loc) / self.scale with np.errstate(divide="ignore"): - lpdf_vals = np.log(self.shape) - np.log(self.scale) + (self.shape - 1) * np.log(z) - np.power(z, self.shape) - return np.where(self.loc < X, lpdf_vals, -np.inf) + lpdf_vals = ( + np.log(self.shape) - np.log(self.scale) + (self.shape - dtype(1)) * np.log(z) - np.power(z, self.shape) + ) + return np.where(self.loc < X, lpdf_vals, dtype(-np.inf)) def _dlog_shape(self, X): """Partial derivative of the lpdf w.r.t. the shape parameter.""" - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + z = (X - self.loc) / self.scale with np.errstate(divide="ignore", invalid="ignore"): - grad = 1.0 / self.shape + np.log(z) - np.power(z, self.shape) * np.log(z) - return np.where(self.loc < X, np.nan_to_num(grad), 0.0) + grad = dtype(1.0) / self.shape + np.log(z) - np.power(z, self.shape) * np.log(z) + return np.where(self.loc < X, np.nan_to_num(grad), dtype(0.0)) def _dlog_loc(self, X): """Partial derivative of the lpdf w.r.t. the loc parameter.""" - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + z = (X - self.loc) / self.scale with np.errstate(divide="ignore", invalid="ignore"): - grad = -(self.shape - 1) / (X - self.loc) + (self.shape / self.scale) * np.power(z, self.shape - 1) - return np.where(self.loc < X, np.nan_to_num(grad), 0.0) + grad = -(self.shape - dtype(1)) / (X - self.loc) + (self.shape / self.scale) * np.power( + z, self.shape - dtype(1) + ) + return np.where(self.loc < X, np.nan_to_num(grad), dtype(0.0)) def _dlog_scale(self, X): """Partial derivative of the lpdf w.r.t. the scale parameter.""" - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) + dtype = self.dtype + z = (X - self.loc) / self.scale grad = -self.shape / self.scale + (self.shape / self.scale) * np.power(z, self.shape) - return np.where(self.loc < X, grad, 0.0) + return np.where(self.loc < X, grad, dtype(0.0)) def log_gradients(self, X): """Calculates the gradients of the log-PDF w.r.t. its parameters. @@ -194,13 +210,13 @@ def log_gradients(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] An array where each row corresponds to a data point in :attr:`X` and each column corresponds to the gradient with respect to a specific optimizable parameter. The order of columns corresponds to the sorted order of :attr:`self.params_to_optimize`. """ - X = np.asarray(X, dtype=float64) + X = np.asarray(X, dtype=self.dtype) gradient_calculators = { self.PARAM_SHAPE: self._dlog_shape, @@ -211,7 +227,7 @@ def log_gradients(self, X): optimizable_params = sorted(list(self.params_to_optimize)) if not optimizable_params: - return np.empty((len(X), 0)) + return np.empty((len(X), 0), dtype=self.dtype) gradients = [gradient_calculators[param](X) for param in optimizable_params] @@ -227,11 +243,11 @@ def generate(self, size: int): Returns ------- - NDArray[np.float64] + NDArray[DType] A NumPy array containing the generated samples. """ - return np.asarray(weibull_min.rvs(c=self.shape, loc=self.loc, scale=self.scale, size=size), dtype=float64) + return np.asarray(weibull_min.rvs(c=self.shape, loc=self.loc, scale=self.scale, size=size), dtype=self.dtype) def __repr__(self) -> str: """Returns a string representation of the object. diff --git a/rework_pysatl_mpest/initializers/clusterize_initializer.py b/rework_pysatl_mpest/initializers/clusterize_initializer.py index 70388206..6b9e644b 100644 --- a/rework_pysatl_mpest/initializers/clusterize_initializer.py +++ b/rework_pysatl_mpest/initializers/clusterize_initializer.py @@ -333,5 +333,5 @@ def perform( total_weight = sum(weights) normalized_weights: list[float] = [w / total_weight for w in weights] - current_mixture = MixtureModel(distributions, normalized_weights) + current_mixture = MixtureModel(distributions, normalized_weights) # type: ignore[var-annotated] return current_mixture diff --git a/rework_pysatl_mpest/typings.py b/rework_pysatl_mpest/typings.py new file mode 100644 index 00000000..c50a65f1 --- /dev/null +++ b/rework_pysatl_mpest/typings.py @@ -0,0 +1,11 @@ +"""A module that provides custom types for the project.""" + +__author__ = "Aleksandra Ri" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from typing import TypeVar + +import numpy as np + +DType = TypeVar("DType", bound=np.floating) diff --git a/rework_tests/unit/core/test_mixture.py b/rework_tests/unit/core/test_mixture.py index 0ad7e922..af5d08da 100644 --- a/rework_tests/unit/core/test_mixture.py +++ b/rework_tests/unit/core/test_mixture.py @@ -89,6 +89,40 @@ def test_init_with_empty_components_raises_value_error(self): with pytest.raises(ValueError, match="List of components cannot be an empty"): MixtureModel(components=[]) + def test_init_casts_component_dtypes(self): + """Tests that the MixtureModel casts all components to its own dtype during initialization.""" + comp1_f64 = Exponential(loc=0.0, rate=1.0) + comp2_f64 = Exponential(loc=5.0, rate=2.0) + assert comp1_f64.dtype == np.float64 + + target_dtype = np.float32 + mixture = MixtureModel(components=[comp1_f64, comp2_f64], dtype=target_dtype) + + assert mixture.dtype == target_dtype + assert mixture.weights.dtype == target_dtype + + for component in mixture.components: + assert component.dtype == target_dtype + for param in component.params: + assert isinstance(getattr(component, param), target_dtype) + + # Original components have not changed + for component in [comp1_f64, comp2_f64]: + assert component.dtype == np.float64 + for param in component.params: + assert isinstance(getattr(component, param), np.float64) + + def test_init_does_not_recreate_components_with_correct_dtype(self): + """Tests that components with the correct dtype are not recreated.""" + target_dtype = np.float32 + comp_f32 = Exponential(loc=0.0, rate=1.0, dtype=target_dtype) + + original_id = id(comp_f32) + + mixture = MixtureModel(components=[comp_f32], dtype=target_dtype) + + assert id(mixture.components[0]) == original_id + class TestMixtureModelProperties: """Tests for the properties of the MixtureModel class.""" @@ -143,6 +177,14 @@ def test_log_weights_setter_with_invalid_shape_raises_error(self, mixture_model: with pytest.raises(ValueError, match="The length of the new logit vector does not match"): mixture_model.log_weights = np.log([0.1, 0.2, 0.7]) + def test_properties_have_correct_dtype(self): + """Tests that checks the dtype of the weights and log_weights properties.""" + target_dtype = np.float32 + mixture = MixtureModel([Exponential(0, 1)], dtype=target_dtype) + + assert mixture.weights.dtype == target_dtype + assert mixture.log_weights.dtype == target_dtype + class TestMixtureModelModification: """Tests for methods that modify the MixtureModel instance.""" @@ -175,6 +217,26 @@ def test_add_component_with_invalid_weight_raises_error(self, mixture_model: Mix with pytest.raises(ValueError, match="The weight of the new component must be in the range"): mixture_model.add_component(Exponential(10, 3), weight=invalid_weight) + def test_add_component_casts_dtype(self): + """Tests that the add_component method casts the type of the new component to the dtype of the mixture.""" + comp = Exponential(loc=0.0, rate=1.0) + target_dtype = np.float32 + mixture = MixtureModel(components=[comp], dtype=target_dtype) + + new_comp_f64 = Exponential(loc=10.0, rate=2.0) + assert new_comp_f64.dtype == np.float64 + + mixture.add_component(new_comp_f64, weight=0.3) + + added_component_in_mixture = mixture.components[-1] + assert added_component_in_mixture.dtype == target_dtype + assert isinstance(added_component_in_mixture.loc, target_dtype) + + # Original component have not changed + assert comp.dtype == np.float64 + for param in comp.params: + assert isinstance(getattr(comp, param), np.float64) + def test_remove_component(self): """Tests removing a component and verifies weight renormalization.""" @@ -233,6 +295,18 @@ def test_lpdf_calculation(self, mixture_model: MixtureModel, X): assert isinstance(calculated_lpdf, np.ndarray) np.testing.assert_allclose(calculated_lpdf, expected_lpdf) + @pytest.mark.parametrize("method_name", ["pdf", "lpdf"]) + def test_pdf_lpdf_methods_return_correct_dtype(self, method_name: str): + """Tests that pdf and lpdf methods return arrays of the correct dtype.""" + target_dtype = np.float32 + mixture = MixtureModel([Exponential(0, 1)], dtype=target_dtype) + method_to_test = getattr(mixture, method_name) + + input_x = np.array([1.0, 2.0, 3.0]) + result = method_to_test(input_x) + + assert result.dtype == target_dtype + def test_loglikelihood_calculation(self, mixture_model: MixtureModel): """Tests that loglikelihood is the sum of LPDF values.""" @@ -240,9 +314,18 @@ def test_loglikelihood_calculation(self, mixture_model: MixtureModel): expected_loglikelihood = np.sum(mixture_model.lpdf(X)) calculated_loglikelihood = mixture_model.loglikelihood(X) - assert isinstance(calculated_loglikelihood, float) + assert isinstance(calculated_loglikelihood, np.float64) assert np.isclose(calculated_loglikelihood, expected_loglikelihood) + def test_loglikelihood_returns_numpy_scalar_with_correct_dtype(self): + """Tests that checks that loglikelihood returns a NumPy scalar of the correct type.""" + target_dtype = np.float32 + mixture = MixtureModel([Exponential(0, 1)], dtype=target_dtype) + + loglik = mixture.loglikelihood(np.array([1, 2, 3])) + + assert isinstance(loglik, target_dtype) + class TestMixtureModelGenerate: """Statistical tests for the `generate` method.""" @@ -298,6 +381,16 @@ def test_generate_statistical_properties(self, seed): proportion_c1 = len(samples_from_c1) / n_samples assert proportion_c1 == pytest.approx(weights[0], abs=0.05) + @pytest.mark.parametrize("size", [0, 10]) + def test_generate_returns_array_with_correct_dtype(self, size): + """Tests that generate returns an array with the correct dtype.""" + target_dtype = np.float32 + mixture = MixtureModel([Exponential(0, 1)], dtype=target_dtype) + + empty_array = mixture.generate(size=size) + assert empty_array.shape == (size,) + assert empty_array.dtype == target_dtype + class TestMixtureModelDunderMethods: """Tests for special (dunder) methods of MixtureModel.""" @@ -443,6 +536,13 @@ def test_neq_different_n_components(self): m2 = MixtureModel(components=c2, weights=[1.0]) assert m1 != m2 + def test_neq_other_object(self, mixture_model, exp_components): + """Tests that a model instance is not equal to an object of a different class.""" + model = mixture_model + + other = "not_a_mixture_model" + assert model != other + def test_hash_consistency(self): """Tests that equal models produce the same hash.""" diff --git a/rework_tests/unit/core/test_parameter.py b/rework_tests/unit/core/test_parameter.py index de5d0753..13fdfe84 100644 --- a/rework_tests/unit/core/test_parameter.py +++ b/rework_tests/unit/core/test_parameter.py @@ -1,10 +1,10 @@ """Tests for Parameter class""" -__author__ = "Danil Totmyanin" +__author__ = "Danil Totmyanin, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" - +import numpy as np import pytest from rework_pysatl_mpest.core import Parameter @@ -29,6 +29,23 @@ def __init__(self, positive_val: float, any_val: float): self.any_param = any_val +class _OwnerClassWithDType: + """A helper class that has a dtype attribute.""" + + positive_param = Parameter(invariant=lambda x: x > 0, error_message="Value must be positive.") + any_param = Parameter() + + def __init__(self, positive_val: float, any_val: float, dtype=np.float64): + """ + Initializes the owner class and its parameters. + Also initializes a set to keep track of fixed parameters. + """ + self.dtype = dtype + self._fixed_params: set[str] = set() + self.positive_param = positive_val + self.any_param = any_val + + @pytest.fixture def owner_instance() -> _OwnerClass: """ @@ -38,6 +55,15 @@ def owner_instance() -> _OwnerClass: return _OwnerClass(positive_val=10.0, any_val=-5.0) +@pytest.fixture +def owner_instance_float32() -> _OwnerClassWithDType: + """ + Pytest fixture to provide a clean instance of _OwnerClassWithDType for each test. + """ + + return _OwnerClassWithDType(positive_val=10.0, any_val=-5.0, dtype=np.float32) + + def test_parameter_initialization(): """ Tests that the Parameter descriptor is initialized correctly with @@ -179,3 +205,33 @@ def test_can_set_unfixed_parameter_after_fixing_another(owner_instance: _OwnerCl assert owner_instance.any_param == expected_any_value assert owner_instance.positive_param == expected_positive_value + + +def test_get_from_instance_with_dtype_returns_correct_type(owner_instance_float32: _OwnerClassWithDType): + """ + Tests that __get__ returns a value of the correct DType when the owner + instance has a `dtype` attribute. + """ + + positive_value = owner_instance_float32.positive_param + any_value = owner_instance_float32.any_param + + assert isinstance(positive_value, np.float32) + assert isinstance(any_value, np.float32) + + assert positive_value == np.float32(10.0) + assert any_value == np.float32(-5.0) + + +def test_set_and_get_with_dtype_casting(owner_instance_float32: _OwnerClassWithDType): + """ + Tests the full set -> get cycle with dtype casting. + """ + + new_positive_value = 123.45 + owner_instance_float32.positive_param = new_positive_value + + retrieved_value = owner_instance_float32.positive_param + + assert isinstance(retrieved_value, np.float32) + assert retrieved_value == np.float32(new_positive_value) diff --git a/rework_tests/unit/distributions/test_beta.py b/rework_tests/unit/distributions/test_beta.py index 013393a7..b185e3e8 100644 --- a/rework_tests/unit/distributions/test_beta.py +++ b/rework_tests/unit/distributions/test_beta.py @@ -7,17 +7,21 @@ import random from pathlib import Path +from typing import ClassVar import numpy as np import pandas as pd import pytest -from hypothesis import given +from hypothesis import assume, given from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Beta +from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import beta, kstest +DTYPES_TO_TEST = [np.float16, np.float32, np.float64] + @st.composite def st_valid_params(draw): @@ -174,7 +178,7 @@ def test_pdf_integral_is_one(self, params): """Tests that the integral of the PDF over its support is equal to 1.""" shape1, shape2, left_border, right_border = params dist = Beta(shape1, shape2, left_border, right_border) - integral, error = quad(dist.pdf, left_border, right_border) + integral, error = quad(lambda x: dist.pdf(x).item(), left_border, right_border) np.testing.assert_allclose(1.0, integral, rtol=1e-6, atol=1e-8) @given(x=st.floats(min_value=1e-4, max_value=1e2, allow_infinity=False)) @@ -202,6 +206,9 @@ def test_lpdf_against_scipy(self, params, x): """Compares the custom LPDF implementation against scipy's implementation.""" shape1, shape2, left_border, right_border = params + + assume(left_border < x < right_border) + dist = Beta(shape1, shape2, left_border, right_border) custom_lpdf = dist.lpdf(x) scipy_lpdf = beta.logpdf(x, shape1, shape2, loc=left_border, scale=right_border - left_border) @@ -422,3 +429,30 @@ def test_generate_kolmogorov_smirnov(self): ks_statistic, p_value = kstest(samples, "beta", args=(shape1, shape2, left_border, right_border - left_border)) lower_bound = 0.05 assert p_value > lower_bound + + +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) +class TestBetaDType(DTypeHandlingMixin): + distribution_class = Beta + default_params: ClassVar[dict] = {"alpha": 1.0, "beta": 2.0, "left_border": -1.0, "right_border": 1.0} + + def test_init_with_dtype_sets_correct_types(self, dtype): + self.check_init_with_dtype_sets_correct_types(dtype) + + @pytest.mark.parametrize("size", [0, 10]) + def test_generate_returns_correct_dtype(self, size, dtype): + self.check_generate_returns_correct_dtype(size, dtype) + + @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1, 1))) + def test_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): + self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) + + @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) + def test_ppf_returns_correct_dtype(self, p_data, dtype): + self.check_ppf_returns_correct_dtype(p_data, dtype) + + @pytest.mark.parametrize("method_name", ["_dlog_alpha", "_dlog_beta", "_dlog_left_border", "_dlog_right_border"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1, 1))) + def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): + self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_cauchy.py b/rework_tests/unit/distributions/test_cauchy.py index 7b349fdc..e80e91fd 100644 --- a/rework_tests/unit/distributions/test_cauchy.py +++ b/rework_tests/unit/distributions/test_cauchy.py @@ -6,6 +6,7 @@ import random +from typing import ClassVar import numpy as np import pytest @@ -13,9 +14,12 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Cauchy +from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import cauchy, kstest +DTYPES_TO_TEST = [np.float16, np.float32, np.float64] + st_scale = st.floats(min_value=1e-3, max_value=1e3, allow_nan=False, allow_infinity=False) st_loc = st.floats(min_value=-1e3, max_value=1e3, allow_nan=False, allow_infinity=False) @@ -100,7 +104,7 @@ def test_pdf_integral_is_one(self, loc, scale): """Tests that the integral of the PDF over its support is equal to 1.""" dist = Cauchy(loc=loc, scale=scale) - integral, error = quad(dist.pdf, loc - 186_124 * scale, loc + 186_124 * scale) + integral, error = quad(lambda x: dist.pdf(x).item(), loc - 186_124 * scale, loc + 186_124 * scale) print(f"integral = {integral}, loc = {loc}, scale = {scale}") np.testing.assert_allclose(1.0, integral, rtol=1e-5) @@ -268,3 +272,30 @@ def test_generate_kolmogorov_smirnov(self): ks_statistic, p_value = kstest(samples, "cauchy", args=(loc, scale)) lower_bound = 0.05 assert p_value > lower_bound + + +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) +class TestCauchyDType(DTypeHandlingMixin): + distribution_class = Cauchy + default_params: ClassVar[dict] = {"loc": 0.0, "scale": 1.0} + + def test_init_with_dtype_sets_correct_types(self, dtype): + self.check_init_with_dtype_sets_correct_types(dtype) + + @pytest.mark.parametrize("size", [0, 10]) + def test_generate_returns_correct_dtype(self, size, dtype): + self.check_generate_returns_correct_dtype(size, dtype) + + @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e3))) + def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): + self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) + + @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) + def check_ppf_returns_correct_dtype(self, p_data, dtype): + self.check_ppf_returns_correct_dtype(p_data, dtype) + + @pytest.mark.parametrize("method_name", ["_dlog_loc", "_dlog_scale"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e6))) + def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): + self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_continuous_distribution.py b/rework_tests/unit/distributions/test_continuous_distribution.py index 653bf36a..06e3eb3f 100644 --- a/rework_tests/unit/distributions/test_continuous_distribution.py +++ b/rework_tests/unit/distributions/test_continuous_distribution.py @@ -6,11 +6,13 @@ from copy import copy +from typing import ClassVar import numpy as np import pytest from numpy.typing import ArrayLike, NDArray -from rework_pysatl_mpest.distributions import ContinuousDistribution +from rework_pysatl_mpest.core import Parameter +from rework_pysatl_mpest.distributions import ContinuousDistribution, Exponential, Normal # Dummy distribution classes # -------------------------- @@ -23,30 +25,17 @@ class DummyDistribution(ContinuousDistribution): and test the non-abstract methods of the base class. """ - def __init__(self, param1: float = 1.0, param2: float = 2.0, name: str = "Dummy"): + param1 = Parameter() + param2 = Parameter() + + def __init__(self, param1: float = 1.0, param2: float = 2.0, name: str = "Dummy", dtype: np.floating = np.float64): """Initializes with two simple parameters.""" - super().__init__() - self._param1 = param1 - self._param2 = param2 + super().__init__(dtype=dtype) + self.param1 = param1 + self.param2 = param2 self._name = name - @property - def param1(self): - return self._param1 - - @param1.setter - def param1(self, value): - self._param1 = value - - @property - def param2(self): - return self._param2 - - @param2.setter - def param2(self, value): - self._param2 = value - @property def name(self) -> str: return self._name @@ -98,6 +87,11 @@ def dummy_dist() -> DummyDistribution: return DummyDistribution(param1=10.0, param2=20.0) +@pytest.fixture +def dummy_float32_dist() -> DummyDistribution: + return DummyDistribution(param1=10.0, param2=20.0, dtype=np.float32) + + @pytest.fixture def dummy_inf_dist() -> DummyInfLpdfDistribution: return DummyInfLpdfDistribution() @@ -193,6 +187,13 @@ def test_get_params_vector_invalid_name_raises_error(self, dummy_dist: DummyDist with pytest.raises(ValueError, match="Invalid parameter names provided"): dummy_dist.get_params_vector(["param1", "invalid_param"]) + def test_get_params_vector_returns_correct_types(self, dummy_float32_dist: DummyDistribution): + """Tests that get_params_vector returns a list of scalars with the correct dtype.""" + param_names = ["param1", "param2"] + vector = dummy_float32_dist.get_params_vector(param_names) + for param in vector: + assert isinstance(param, np.float32) + # Test set_params_from_vector method # ---------------------------------- @@ -237,6 +238,84 @@ def test_set_params_from_vector_raises_errors( with pytest.raises(ValueError, match=error_msg_match): dummy_dist.set_params_from_vector(param_names, vector) + @pytest.mark.parametrize( + "param_names, vector_to_set", + [ + (["param1", "param2"], [100.0, 200.0]), + (("param2",), (99.0,)), + (["param1", "param2"], np.array([1.5, 2.5])), + ], + ) + def test_set_params_from_vector_correct_dtype( + self, dummy_float32_dist: DummyDistribution, param_names, vector_to_set + ): + """Tests setting parameter values from a vector.""" + + dummy_float32_dist.set_params_from_vector(param_names, vector_to_set) + + for param_name in param_names: + param_value = getattr(dummy_float32_dist, param_name) + assert isinstance(param_value, np.float32) + + # Test to_dtype method + # ---------------------------------- + + def test_to_dtype_successful_conversion(self, dummy_dist: DummyDistribution): + """ + Tests that _to_dtype creates a new instance with the correct new dtype + and that the original instance remains unchanged. + """ + assert dummy_dist.dtype == np.float64 + for param in dummy_dist.params: + assert isinstance(getattr(dummy_dist, param), np.float64) + + target_dtype = np.float32 + new_dist = dummy_dist.astype(target_dtype) + + assert new_dist is not dummy_dist + assert new_dist != dummy_dist + + assert new_dist.dtype == np.float32 + for param in new_dist.params: + assert isinstance(getattr(new_dist, param), target_dtype) + assert getattr(new_dist, param) == np.float32(getattr(dummy_dist, param)) + + # original instance remains unchanged + assert dummy_dist.dtype == np.float64 + for param in dummy_dist.params: + assert isinstance(getattr(dummy_dist, param), np.float64) + + def test_to_dtype_returns_self_if_same_dtype(self, dummy_dist: DummyDistribution): + """ + Tests that _to_dtype returns the same instance if the target dtype + is identical to the current one, avoiding unnecessary copying. + """ + assert dummy_dist.dtype == np.float64 + + same_dtype_dist = dummy_dist.astype(np.float64) + + assert same_dtype_dist is dummy_dist + + def test_to_dtype_preserves_fixed_params(self, dummy_dist: DummyDistribution): + """ + Tests that the set of fixed parameters is correctly copied to the + new instance after dtype conversion. + """ + dummy_dist.fix_param("param1") + assert "param1" in dummy_dist._fixed_params + + new_dist = dummy_dist.astype(np.float32) + + assert new_dist.dtype == np.float32 + for param in new_dist.params: + assert isinstance(getattr(new_dist, param), np.float32) + assert getattr(new_dist, param) == np.float32(getattr(dummy_dist, param)) + + assert "param1" in new_dist._fixed_params + assert new_dist._fixed_params == dummy_dist._fixed_params + + assert new_dist._fixed_params is not dummy_dist._fixed_params + class TestContinuousDistributionCopying: """Tests the __copy__ method implementation for ContinuousDistribution.""" @@ -299,8 +378,8 @@ def test_neq_different_params(self): def test_neq_different_type(self): """Tests that two instances with different types are not equal.""" - d1 = DummyDistribution(param1=1.0, param2=2.0) - d2 = DummyDistribution(param1=1.0, param2=2.0, name="Dummy2") + d1 = Normal(loc=0.0, scale=1.0) + d2 = Exponential(loc=0.0, rate=1.0) assert d1 != d2 def test_neq_other_object(self): @@ -310,6 +389,13 @@ def test_neq_other_object(self): other = "not_a_distribution" assert d1 != other + def test_neq_different_dtype(self): + """Tests that two instances with different dtypes are not equal.""" + + d1 = DummyDistribution(param1=1.0, param2=2.0) + d2 = DummyDistribution(param1=1.0, param2=2.0, dtype=np.float32) + assert d1 != d2 + def test_hash_consistency(self): """Tests that two equal distribution instances have the same hash.""" @@ -333,3 +419,70 @@ def test_hash_inequality_name(self): d2 = DummyDistribution(param1=1.0, param2=2.0, name="Dummy2") assert d1 != d2 assert hash(d1) != hash(d2) + + def test_hash_inequality_dtype(self): + """Tests that two non-equal type distribution instances have different hashes.""" + + d1 = DummyDistribution(param1=1.0, param2=2.0) + d2 = DummyDistribution(param1=1.0, param2=2.0, dtype=np.float32) + assert d1 != d2 + assert hash(d1) != hash(d2) + + +class DTypeHandlingMixin: + """A test mixin to verify correct dtype handling in all subclasses of ContinuousDistribution.""" + + distribution_class: ClassVar[type[ContinuousDistribution] | None] = None + default_params: ClassVar[dict] = {} + + # Tests initialization + # -------------------- + + def check_init_with_dtype_sets_correct_types(self, dtype): + """Tests that the constructor and the Parameter descriptor correctly cast parameter types.""" + + dist = self.distribution_class(**self.default_params, dtype=dtype) + + assert dist.dtype == dtype + + for param_name in self.default_params: + param_value = getattr(dist, param_name) + assert isinstance(param_value, dtype) + + # Tests generate + # -------------------- + + def check_generate_returns_correct_dtype(self, size, dtype): + """Tests the dtype of the array returned by the generate method.""" + dist = self.distribution_class(**self.default_params, dtype=dtype) + + result = dist.generate(size=size) + + assert isinstance(result, np.ndarray) + assert result.dtype == dtype + + # Tests calculations + # -------------------- + + def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): + """Helper method with the logic for testing methods that take X.""" + dist = self.distribution_class(**self.default_params, dtype=dtype) + method_to_test = getattr(dist, method_name) + result = method_to_test(x_data) + assert isinstance(result, np.ndarray) + assert result.dtype == dtype + + def check_ppf_returns_correct_dtype(self, p_data, dtype): + """Helper method with the logic for testing the ppf method.""" + dist = self.distribution_class(**self.default_params, dtype=dtype) + result = dist.ppf(p_data) + assert isinstance(result, np.ndarray) + assert result.dtype == dtype + + def check_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): + """Tests that each partial derivative method (_dlog_*) returns a NumPy array with the correct dtype.""" + + dist = self.distribution_class(**self.default_params, dtype=dtype) + method = getattr(dist, method_name) + + assert method(x_data).dtype == dtype diff --git a/rework_tests/unit/distributions/test_exponential.py b/rework_tests/unit/distributions/test_exponential.py index a876fb48..41e15161 100644 --- a/rework_tests/unit/distributions/test_exponential.py +++ b/rework_tests/unit/distributions/test_exponential.py @@ -6,6 +6,7 @@ import random +from typing import ClassVar import numpy as np import pytest @@ -13,9 +14,12 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Exponential +from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import expon, kstest +DTYPES_TO_TEST = [np.float16, np.float32, np.float64] + st_rate = st.floats(min_value=1e-3, max_value=1e3, allow_nan=False, allow_infinity=False) st_loc = st.floats(min_value=-1e3, max_value=1e3, allow_nan=False, allow_infinity=False) @@ -90,6 +94,8 @@ def test_pdf_properties(self, loc, rate, x): def test_pdf_against_scipy(self, loc, rate, x): """Compares the custom PDF implementation against scipy's implementation.""" + assume(x > loc) + dist = Exponential(loc=loc, rate=rate) custom_pdf = dist.pdf(x) scipy_pdf = expon.pdf(x, loc=loc, scale=1 / rate) @@ -100,7 +106,7 @@ def test_pdf_integral_is_one(self, loc, rate): """Tests that the integral of the PDF over its support is equal to 1.""" dist = Exponential(loc=loc, rate=rate) - integral, error = quad(dist.pdf, loc, np.inf) + integral, error = quad(lambda x: dist.pdf(x).item(), loc, np.inf) np.testing.assert_allclose(1.0, integral) @given(loc=st_loc, rate=st_rate, x=st.floats(max_value=-1e6, allow_infinity=False)) @@ -128,6 +134,8 @@ def test_lpdf_return_type_and_shape(self, loc, rate, x): def test_lpdf_against_scipy(self, loc, rate, x): """Compares the custom LPDF implementation against scipy's implementation.""" + assume(x > loc) + dist = Exponential(loc=loc, rate=rate) custom_lpdf = dist.lpdf(x) scipy_lpdf = expon.logpdf(x, loc=loc, scale=1 / rate) @@ -300,3 +308,30 @@ def test_generate_kolmogorov_smirnov(self): ks_statistic, p_value = kstest(samples, "expon", args=(loc, 1 / rate)) lower_bound = 0.05 assert p_value > lower_bound + + +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) +class TestExponentialDType(DTypeHandlingMixin): + distribution_class = Exponential + default_params: ClassVar[dict] = {"loc": 0.0, "rate": 1.0} + + def test_init_with_dtype_sets_correct_types(self, dtype): + self.check_init_with_dtype_sets_correct_types(dtype) + + @pytest.mark.parametrize("size", [0, 10]) + def test_generate_returns_correct_dtype(self, size, dtype): + self.check_generate_returns_correct_dtype(size, dtype) + + @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e3))) + def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): + self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) + + @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) + def check_ppf_returns_correct_dtype(self, p_data, dtype): + self.check_ppf_returns_correct_dtype(p_data, dtype) + + @pytest.mark.parametrize("method_name", ["_dlog_loc", "_dlog_rate"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e6))) + def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): + self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_normal.py b/rework_tests/unit/distributions/test_normal.py index 99a86867..c22864d8 100644 --- a/rework_tests/unit/distributions/test_normal.py +++ b/rework_tests/unit/distributions/test_normal.py @@ -5,6 +5,7 @@ __license__ = "SPDX-License-Identifier: MIT" import random +from typing import ClassVar import numpy as np import pytest @@ -12,9 +13,12 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Normal +from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import kstest, norm +DTYPES_TO_TEST = [np.float16, np.float32, np.float64] + # Strategies for hypothesis st_loc = st.floats(min_value=-1e3, max_value=1e3, allow_nan=False, allow_infinity=False) st_scale = st.floats(min_value=0.01, max_value=1e3, allow_nan=False, allow_infinity=False) @@ -91,7 +95,7 @@ def test_pdf_integral_is_one(self, loc, scale): """Tests that the integral of the PDF over its support is equal to 1.""" dist = Normal(loc=loc, scale=scale) - integral, error = quad(dist.pdf, loc - scale * 6, loc + scale * 6) + integral, error = quad(lambda x: dist.pdf(x).item(), loc - scale * 6, loc + scale * 6) np.testing.assert_allclose(1.0, integral, atol=1e-7) @@ -228,3 +232,30 @@ def test_generate_kolmogorov_smirnov(self): samples = dist.generate(size=size) ks_statistic, p_value = kstest(samples, "norm", args=(loc, scale)) assert p_value > expected_p_value + + +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) +class TestNormalDType(DTypeHandlingMixin): + distribution_class = Normal + default_params: ClassVar[dict] = {"loc": 0.0, "scale": 1.0} + + def test_init_with_dtype_sets_correct_types(self, dtype): + self.check_init_with_dtype_sets_correct_types(dtype) + + @pytest.mark.parametrize("size", [0, 10]) + def test_generate_returns_correct_dtype(self, size, dtype): + self.check_generate_returns_correct_dtype(size, dtype) + + @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e3, 1e3))) + def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): + self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) + + @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) + def check_ppf_returns_correct_dtype(self, p_data, dtype): + self.check_ppf_returns_correct_dtype(p_data, dtype) + + @pytest.mark.parametrize("method_name", ["_dlog_loc", "_dlog_scale"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) + def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): + self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_pareto.py b/rework_tests/unit/distributions/test_pareto.py index c5ae31e2..97886f16 100644 --- a/rework_tests/unit/distributions/test_pareto.py +++ b/rework_tests/unit/distributions/test_pareto.py @@ -7,6 +7,7 @@ import random from pathlib import Path +from typing import ClassVar import numpy as np import pandas as pd @@ -15,9 +16,12 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions.pareto import Pareto +from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import kstest, pareto +DTYPES_TO_TEST = [np.float16, np.float32, np.float64] + st_shape = st.floats(min_value=1e-3, max_value=1e3, allow_nan=False, allow_infinity=False) st_scale = st.floats(min_value=1e-3, max_value=1e3, allow_nan=False, allow_infinity=False) @@ -138,7 +142,7 @@ def test_pdf_integral_is_one(self, shape, scale): """Tests that the integral of the PDF over its support is equal to 1.""" dist = Pareto(shape=shape, scale=scale) - integral, error = quad(dist.pdf, scale, np.inf, epsabs=1e-10, epsrel=1e-10, limit=100) + integral, error = quad(lambda x: dist.pdf(x).item(), scale, np.inf, epsabs=1e-10, epsrel=1e-10, limit=100) assert np.isfinite(integral), f"Integral diverged: {integral}" np.testing.assert_allclose(1.0, integral, rtol=1e-8, atol=1e-10) @@ -180,7 +184,7 @@ def test_lpdf_against_scipy(self, shape, scale, x): dist = Pareto(shape=shape, scale=scale) custom_lpdf = dist.lpdf(x) scipy_lpdf = pareto.logpdf(x, scale=scale, b=shape, loc=0.0) - np.testing.assert_allclose(custom_lpdf, scipy_lpdf, atol=1e-12) + np.testing.assert_allclose(custom_lpdf, scipy_lpdf, atol=1e-12, rtol=1e-3) @given(shape=st_shape, scale=st_scale, x=st.floats(min_value=1e2, max_value=1e4, allow_infinity=False)) def test_lpdf_outside_support(self, shape, scale, x): @@ -358,3 +362,30 @@ def test_generate_kolmogorov_smirnov(self): ks_statistic, p_value = kstest(samples, "pareto", args=(shape, loc, scale)) lower_bound = 0.05 assert p_value > lower_bound + + +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) +class TestParetoDType(DTypeHandlingMixin): + distribution_class = Pareto + default_params: ClassVar[dict] = {"shape": 1.0, "scale": 2.0} + + def test_init_with_dtype_sets_correct_types(self, dtype): + self.check_init_with_dtype_sets_correct_types(dtype) + + @pytest.mark.parametrize("size", [0, 10]) + def test_generate_returns_correct_dtype(self, size, dtype): + self.check_generate_returns_correct_dtype(size, dtype) + + @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(2, 1e3))) + def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): + self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) + + @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) + def check_ppf_returns_correct_dtype(self, p_data, dtype): + self.check_ppf_returns_correct_dtype(p_data, dtype) + + @pytest.mark.parametrize("method_name", ["_dlog_shape", "_dlog_scale"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(2, 1e6))) + def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): + self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_uniform.py b/rework_tests/unit/distributions/test_uniform.py index 46d35648..0aa61ef2 100644 --- a/rework_tests/unit/distributions/test_uniform.py +++ b/rework_tests/unit/distributions/test_uniform.py @@ -5,6 +5,7 @@ __license__ = "SPDX-License-Identifier: MIT" import random +from typing import ClassVar import numpy as np import pytest @@ -12,9 +13,12 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Uniform +from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import kstest, uniform +DTYPES_TO_TEST = [np.float16, np.float32, np.float64] + @st.composite def st_valid_border(draw): @@ -106,7 +110,7 @@ def test_pdf_integral_is_one(self, borders): """Tests that the integral of the PDF over its support is equal to 1.""" left_border, right_border = borders dist = Uniform(left_border=left_border, right_border=right_border) - integral, error = quad(dist.pdf, left_border, right_border) + integral, error = quad(lambda x: dist.pdf(x).item(), left_border, right_border) np.testing.assert_allclose(1.0, integral) @given(borders=st_valid_border(), x=st.floats(max_value=-1e9, allow_infinity=False)) @@ -314,3 +318,30 @@ def test_generate_kolmogorov_smirnov(self): ks_statistic, p_value = kstest(samples, "uniform", args=(left_border, right_border - left_border)) lower_bound = 0.05 assert p_value > lower_bound + + +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) +class TestUniformDType(DTypeHandlingMixin): + distribution_class = Uniform + default_params: ClassVar[dict] = {"left_border": 0.0, "right_border": 1.0} + + def test_init_with_dtype_sets_correct_types(self, dtype): + self.check_init_with_dtype_sets_correct_types(dtype) + + @pytest.mark.parametrize("size", [0, 10]) + def test_generate_returns_correct_dtype(self, size, dtype): + self.check_generate_returns_correct_dtype(size, dtype) + + @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e3, 1e3))) + def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): + self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) + + @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) + def check_ppf_returns_correct_dtype(self, p_data, dtype): + self.check_ppf_returns_correct_dtype(p_data, dtype) + + @pytest.mark.parametrize("method_name", ["_dlog_left_border", "_dlog_right_border"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) + def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): + self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_weibull.py b/rework_tests/unit/distributions/test_weibull.py index d992775b..220f9f39 100644 --- a/rework_tests/unit/distributions/test_weibull.py +++ b/rework_tests/unit/distributions/test_weibull.py @@ -6,6 +6,7 @@ import random +from typing import ClassVar import numpy as np import pytest @@ -13,10 +14,13 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Weibull +from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.special import gamma from scipy.stats import kstest, weibull_min +DTYPES_TO_TEST = [np.float16, np.float32, np.float64] + # Strategies for generating valid Weibull parameters st_shape = st.floats(min_value=0.5, max_value=10, allow_nan=False, allow_infinity=False) st_loc = st.floats(min_value=-5, max_value=5, allow_nan=False, allow_infinity=False) @@ -96,6 +100,7 @@ def test_pdf_properties(self, shape, loc, scale, x): @given(shape=st_shape, loc=st_loc, scale=st_scale, x=st.floats(1e-6, 1e6)) def test_pdf_against_scipy(self, shape, loc, scale, x): """Compares the custom PDF implementation against scipy's implementation.""" + assume(x > loc) dist = Weibull(shape=shape, loc=loc, scale=scale) custom_pdf = dist.pdf(x) @@ -107,7 +112,7 @@ def test_pdf_integral_is_one(self, shape, loc, scale): """Tests that the integral of the PDF over its support is equal to 1.""" dist = Weibull(shape=shape, loc=loc, scale=scale) - integral, error = quad(dist.pdf, loc, np.inf) + integral, error = quad(lambda x: dist.pdf(x).item(), loc, np.inf) np.testing.assert_allclose(1.0, integral, atol=1e-6) @given(shape=st_shape, loc=st_loc, scale=st_scale, x=st.floats(max_value=-1e6, allow_infinity=False)) @@ -335,3 +340,30 @@ def test_generate_kolmogorov_smirnov(self): # args for scipy's weibull_min are (shape, loc, scale) ks_statistic, p_value = kstest(samples, "weibull_min", args=(shape, loc, scale)) assert p_value > expected_p_value + + +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) +class TestWeibullDType(DTypeHandlingMixin): + distribution_class = Weibull + default_params: ClassVar[dict] = {"shape": 2.0, "loc": 0.0, "scale": 1.0} + + def test_init_with_dtype_sets_correct_types(self, dtype): + self.check_init_with_dtype_sets_correct_types(dtype) + + @pytest.mark.parametrize("size", [0, 10]) + def test_generate_returns_correct_dtype(self, size, dtype): + self.check_generate_returns_correct_dtype(size, dtype) + + @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e3))) + def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): + self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) + + @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) + def check_ppf_returns_correct_dtype(self, p_data, dtype): + self.check_ppf_returns_correct_dtype(p_data, dtype) + + @pytest.mark.parametrize("method_name", ["_dlog_shape", "_dlog_loc", "_dlog_scale"]) + @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e6))) + def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): + self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/estimators/iterative/pruners/test_prior_pruner.py b/rework_tests/unit/estimators/iterative/pruners/test_prior_pruner.py index fb4fc61b..2c877397 100644 --- a/rework_tests/unit/estimators/iterative/pruners/test_prior_pruner.py +++ b/rework_tests/unit/estimators/iterative/pruners/test_prior_pruner.py @@ -19,8 +19,8 @@ class DummyDistribution(ContinuousDistribution): """A simple mock implementation of ContinuousDistribution for testing purposes.""" - def __init__(self, name: str): - super().__init__() + def __init__(self, name: str, dtype: np.floating = np.float64): + super().__init__(dtype=dtype) self._name = name @property From 6a0d02c3d9aa68d9ada45d14509151ae73b600b0 Mon Sep 17 00:00:00 2001 From: xImoZA Date: Wed, 5 Nov 2025 18:20:20 +0300 Subject: [PATCH 2/7] fix(distributions): correct type hint for vector in set_params_from_vector --- rework_pysatl_mpest/distributions/continuous_dist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rework_pysatl_mpest/distributions/continuous_dist.py b/rework_pysatl_mpest/distributions/continuous_dist.py index 6f0d19c0..58aca34b 100644 --- a/rework_pysatl_mpest/distributions/continuous_dist.py +++ b/rework_pysatl_mpest/distributions/continuous_dist.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from collections.abc import Sequence -from typing import Generic +from typing import Generic, Union import numpy as np from numpy.typing import ArrayLike, NDArray @@ -165,7 +165,7 @@ def get_params_vector(self, param_names: Sequence[str]) -> list[DType]: return [getattr(self, name) for name in param_names] - def set_params_from_vector(self, param_names: Sequence[str], vector: Sequence[float]): + def set_params_from_vector(self, param_names: Sequence[str], vector: Sequence[Union[float, DType]]): """Sets parameter values from a sequence of floats. Updates the distribution's parameters using values from the provided From 16f48d50d665d876e0f77772fe06a3eecb5a9264 Mon Sep 17 00:00:00 2001 From: xImoZA Date: Tue, 11 Nov 2025 14:49:43 +0300 Subject: [PATCH 3/7] chore(tests): correct deprecated method name --- rework_pysatl_mpest/core/mixture.py | 4 ++-- rework_tests/unit/core/test_mixture.py | 6 +++--- .../distributions/test_continuous_distribution.py | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rework_pysatl_mpest/core/mixture.py b/rework_pysatl_mpest/core/mixture.py index 26e2c709..eeecdfbd 100644 --- a/rework_pysatl_mpest/core/mixture.py +++ b/rework_pysatl_mpest/core/mixture.py @@ -100,7 +100,7 @@ def __init__( self._validate_weights(n_components, weights) self._components = [comp.astype(self.dtype) for comp in components] - self._log_weights = np.log(weights + self.dtype(1e-30)) + self._log_weights = np.log(weights + np.finfo(self.dtype).tiny) self._cached_weights: Optional[NDArray[DType]] = None self._sorted_pairs_cache: Optional[list[tuple[ContinuousDistribution[DType], DType]]] = None @@ -308,7 +308,7 @@ def loglikelihood(self, X: ArrayLike) -> DType: Returns ------- - float + DType The total log-likelihood value. """ diff --git a/rework_tests/unit/core/test_mixture.py b/rework_tests/unit/core/test_mixture.py index af5d08da..cf834996 100644 --- a/rework_tests/unit/core/test_mixture.py +++ b/rework_tests/unit/core/test_mixture.py @@ -387,9 +387,9 @@ def test_generate_returns_array_with_correct_dtype(self, size): target_dtype = np.float32 mixture = MixtureModel([Exponential(0, 1)], dtype=target_dtype) - empty_array = mixture.generate(size=size) - assert empty_array.shape == (size,) - assert empty_array.dtype == target_dtype + samples = mixture.generate(size=size) + assert samples.shape == (size,) + assert samples.dtype == target_dtype class TestMixtureModelDunderMethods: diff --git a/rework_tests/unit/distributions/test_continuous_distribution.py b/rework_tests/unit/distributions/test_continuous_distribution.py index 06e3eb3f..dd096e0f 100644 --- a/rework_tests/unit/distributions/test_continuous_distribution.py +++ b/rework_tests/unit/distributions/test_continuous_distribution.py @@ -257,12 +257,12 @@ def test_set_params_from_vector_correct_dtype( param_value = getattr(dummy_float32_dist, param_name) assert isinstance(param_value, np.float32) - # Test to_dtype method + # Test astype method # ---------------------------------- - def test_to_dtype_successful_conversion(self, dummy_dist: DummyDistribution): + def test_astype_successful_conversion(self, dummy_dist: DummyDistribution): """ - Tests that _to_dtype creates a new instance with the correct new dtype + Tests that astype creates a new instance with the correct new dtype and that the original instance remains unchanged. """ assert dummy_dist.dtype == np.float64 @@ -285,9 +285,9 @@ def test_to_dtype_successful_conversion(self, dummy_dist: DummyDistribution): for param in dummy_dist.params: assert isinstance(getattr(dummy_dist, param), np.float64) - def test_to_dtype_returns_self_if_same_dtype(self, dummy_dist: DummyDistribution): + def test_astype_returns_self_if_same_dtype(self, dummy_dist: DummyDistribution): """ - Tests that _to_dtype returns the same instance if the target dtype + Tests that astype returns the same instance if the target dtype is identical to the current one, avoiding unnecessary copying. """ assert dummy_dist.dtype == np.float64 @@ -296,7 +296,7 @@ def test_to_dtype_returns_self_if_same_dtype(self, dummy_dist: DummyDistribution assert same_dtype_dist is dummy_dist - def test_to_dtype_preserves_fixed_params(self, dummy_dist: DummyDistribution): + def test_astype_preserves_fixed_params(self, dummy_dist: DummyDistribution): """ Tests that the set of fixed parameters is correctly copied to the new instance after dtype conversion. From 6cd6a8a3f098894a24c44d03370aec87ad71be5e Mon Sep 17 00:00:00 2001 From: xImoZA Date: Thu, 13 Nov 2025 19:47:01 +0300 Subject: [PATCH 4/7] chore(tests): standardize distribution tests for dtype and repr validation --- rework_pysatl_mpest/distributions/beta.py | 5 +- rework_pysatl_mpest/distributions/cauchy.py | 4 +- .../distributions/exponential.py | 4 +- rework_pysatl_mpest/distributions/normal.py | 4 +- rework_pysatl_mpest/distributions/pareto.py | 4 +- rework_pysatl_mpest/distributions/uniform.py | 7 +- rework_pysatl_mpest/distributions/weibull.py | 7 +- rework_tests/unit/distributions/test_beta.py | 234 +++++++++--------- .../unit/distributions/test_cauchy.py | 146 +++++------ .../test_continuous_distribution.py | 60 ----- .../unit/distributions/test_exponential.py | 155 ++++++------ .../unit/distributions/test_normal.py | 168 +++++++------ .../unit/distributions/test_pareto.py | 167 ++++++------- .../unit/distributions/test_uniform.py | 199 ++++++++------- .../unit/distributions/test_weibull.py | 185 +++++++------- 15 files changed, 640 insertions(+), 709 deletions(-) diff --git a/rework_pysatl_mpest/distributions/beta.py b/rework_pysatl_mpest/distributions/beta.py index 7c42d150..a0eb41a9 100644 --- a/rework_pysatl_mpest/distributions/beta.py +++ b/rework_pysatl_mpest/distributions/beta.py @@ -408,7 +408,7 @@ def __repr__(self) -> str: ------- str A string that can be used to recreate the object, e.g., - "Beta(alpha=1.0, beta=2.0, left_border=0.0, right_border=1.0)". + "Beta(alpha=1.0, beta=2.0, left_border=0.0, right_border=1.0, dtype=np.float64)". """ return ( @@ -416,5 +416,6 @@ def __repr__(self) -> str: f"alpha={self.alpha}, " f"beta={self.beta}, " f"left_border={self.left_border}, " - f"right_border={self.right_border})" + f"right_border={self.right_border}, " + f"dtype=np.{self.dtype.__name__})" ) diff --git a/rework_pysatl_mpest/distributions/cauchy.py b/rework_pysatl_mpest/distributions/cauchy.py index a0ebcbcf..ba487587 100644 --- a/rework_pysatl_mpest/distributions/cauchy.py +++ b/rework_pysatl_mpest/distributions/cauchy.py @@ -270,7 +270,7 @@ def __repr__(self) -> str: ------- str A string that can be used to recreate the object, e.g., - "Cauchy(loc=0.0, scale=2.0)". + "Cauchy(loc=0.0, scale=2.0, dtype=np.float64)". """ - return f"{self.__class__.__name__}(loc={self.loc}, scale={self.scale})" + return f"{self.__class__.__name__}(loc={self.loc}, scale={self.scale}, dtype=np.{self.dtype.__name__})" diff --git a/rework_pysatl_mpest/distributions/exponential.py b/rework_pysatl_mpest/distributions/exponential.py index 176177dd..772ba00c 100644 --- a/rework_pysatl_mpest/distributions/exponential.py +++ b/rework_pysatl_mpest/distributions/exponential.py @@ -252,7 +252,7 @@ def __repr__(self) -> str: ------- str A string that can be used to recreate the object, e.g., - "Exponential(loc=0.0, rate=2.0)". + "Exponential(loc=0.0, rate=2.0, dtype=np.float64)". """ - return f"{self.__class__.__name__}(loc={self.loc}, rate={self.rate})" + return f"{self.__class__.__name__}(loc={self.loc}, rate={self.rate}, dtype=np.{self.dtype.__name__})" diff --git a/rework_pysatl_mpest/distributions/normal.py b/rework_pysatl_mpest/distributions/normal.py index 2460fb71..30da38d3 100644 --- a/rework_pysatl_mpest/distributions/normal.py +++ b/rework_pysatl_mpest/distributions/normal.py @@ -215,7 +215,7 @@ def __repr__(self) -> str: ------- str A string that can be used to recreate the object, e.g., - "Normal(loc=0.0, scale=1.0)". + "Normal(loc=0.0, scale=1.0, dtype=np.float64)". """ - return f"{self.__class__.__name__}(loc={self.loc}, scale={self.scale})" + return f"{self.__class__.__name__}(loc={self.loc}, scale={self.scale}, dtype=np.{self.dtype.__name__})" diff --git a/rework_pysatl_mpest/distributions/pareto.py b/rework_pysatl_mpest/distributions/pareto.py index e9ed9866..2c9c696a 100644 --- a/rework_pysatl_mpest/distributions/pareto.py +++ b/rework_pysatl_mpest/distributions/pareto.py @@ -257,7 +257,7 @@ def __repr__(self) -> str: ------- str A string that can be used to recreate the object, e.g., - "Pareto(shape=0.0, scale=2.0)". + "Pareto(shape=0.0, scale=2.0, dtype=np.float64)". """ - return f"{self.__class__.__name__}(shape={self.shape}, scale={self.scale})" + return f"{self.__class__.__name__}(shape={self.shape}, scale={self.scale}, dtype=np.{self.dtype.__name__})" diff --git a/rework_pysatl_mpest/distributions/uniform.py b/rework_pysatl_mpest/distributions/uniform.py index d4b0aec9..75f80ebb 100644 --- a/rework_pysatl_mpest/distributions/uniform.py +++ b/rework_pysatl_mpest/distributions/uniform.py @@ -262,7 +262,10 @@ def __repr__(self) -> str: ------- str A string that can be used to recreate the object, e.g., - "Uniform(left_border=0.0, right_border=2.0)". + "Uniform(left_border=0.0, right_border=2.0, dtype=np.float64)". """ - return f"{self.__class__.__name__}(left_border={self.left_border}, right_border={self.right_border})" + return ( + f"{self.__class__.__name__}(left_border={self.left_border}, " + f"right_border={self.right_border}, dtype=np.{self.dtype.__name__})" + ) diff --git a/rework_pysatl_mpest/distributions/weibull.py b/rework_pysatl_mpest/distributions/weibull.py index 2e966b97..e06e4490 100644 --- a/rework_pysatl_mpest/distributions/weibull.py +++ b/rework_pysatl_mpest/distributions/weibull.py @@ -256,7 +256,10 @@ def __repr__(self) -> str: ------- str A string that can be used to recreate the object, e.g., - "Weibull(shape=2.0, loc=0.0, scale=1.0)". + "Weibull(shape=2.0, loc=0.0, scale=1.0, dtype=np.float64)". """ - return f"{self.__class__.__name__}(shape={self.shape}, loc={self.loc}, scale={self.scale})" + return ( + f"{self.__class__.__name__}(shape={self.shape}, " + f"loc={self.loc}, scale={self.scale}, dtype=np.{self.dtype.__name__})" + ) diff --git a/rework_tests/unit/distributions/test_beta.py b/rework_tests/unit/distributions/test_beta.py index b185e3e8..ace54c4e 100644 --- a/rework_tests/unit/distributions/test_beta.py +++ b/rework_tests/unit/distributions/test_beta.py @@ -7,7 +7,6 @@ import random from pathlib import Path -from typing import ClassVar import numpy as np import pandas as pd @@ -16,7 +15,6 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Beta -from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import beta, kstest @@ -72,82 +70,87 @@ def st_params_and_x_for_grad(draw): return (shape1, shape2, left, right, x) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestBetaInitialization: """Tests for the __init__ method and basic properties.""" - def test_initialization_successful(self): + def test_initialization_successful(self, dtype): """Tests that the instance is initialized correctly with valid parameters.""" shape1, shape2, left_border, right_border = 0.5, 2.0, -1.0, 1.0 - dist = Beta(alpha=shape1, beta=shape2, left_border=left_border, right_border=right_border) - assert isinstance(dist.alpha, float) - assert isinstance(dist.beta, float) - assert isinstance(dist.left_border, float) - assert isinstance(dist.right_border, float) - assert dist.alpha == shape1 - assert dist.beta == shape2 - assert dist.left_border == left_border - assert dist.right_border == right_border - - def test_name_property(self): + dist = Beta(alpha=shape1, beta=shape2, lower_bound=left_border, upper_bound=right_border, dtype=dtype) + assert dist.alpha.dtype == dtype + assert dist.beta.dtype == dtype + assert dist.left_border.dtype == dtype + assert dist.right_border.dtype == dtype + assert dist.alpha == dtype(shape1) + assert dist.beta == dtype(shape2) + assert dist.left_border == dtype(left_border) + assert dist.right_border == dtype(right_border) + + def test_name_property(self, dtype): """Tests that the name property returns the correct string.""" - dist = Beta(alpha=1.0, beta=2.0, left_border=-1.0, right_border=1.0) + dist = Beta(alpha=1.0, beta=2.0, left_border=-1.0, right_border=1.0, dtype=dtype) assert dist.name == "Beta" - def test_params_property(self): + def test_params_property(self, dtype): """Tests that the params property returns the correct set of parameter names.""" - dist = Beta(alpha=1.0, beta=2.0, left_border=-1.0, right_border=1.0) + dist = Beta(alpha=1.0, beta=2.0, left_border=-1.0, right_border=1.0, dtype=dtype) assert dist.params == {"alpha", "beta", "left_border", "right_border"} - def test_alpha_invariant_violation(self): + def test_alpha_invariant_violation(self, dtype): """Tests that initializing with a non-positive alpha raises a ValueError.""" with pytest.raises(ValueError, match="Alpha parameter should be positive or zero"): - Beta(alpha=-1.0, beta=2.0, left_border=10.0, right_border=20.0) + Beta(alpha=-1.0, beta=2.0, left_border=10.0, right_border=20.0, dtype=dtype) with pytest.raises(ValueError, match="Alpha parameter should be positive or zero"): - Beta(alpha=-20.0, beta=2.0, left_border=10.0, right_border=20.0) + Beta(alpha=-20.0, beta=2.0, left_border=10.0, right_border=20.0, dtype=dtype) - def test_alpha_assignment_violation(self): + def test_alpha_assignment_violation(self, dtype): """Tests that assigning a non-positive rate after initialization raises a ValueError.""" - dist = Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=20.0) + dist = Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=20.0, dtype=dtype) with pytest.raises(ValueError, match="Alpha parameter should be positive or zero"): dist.alpha = -1.0 with pytest.raises(ValueError, match="Alpha parameter should be positive or zero"): dist.alpha = -10.0 - def test_beta_invariant_violation(self): + def test_beta_invariant_violation(self, dtype): """Tests that initializing with a non-positive beta raises a ValueError.""" with pytest.raises(ValueError, match="Beta parameter should be positive or zero"): - Beta(alpha=1.0, beta=-2.0, left_border=10.0, right_border=20.0) + Beta(alpha=1.0, beta=-2.0, left_border=10.0, right_border=20.0, dtype=dtype) with pytest.raises(ValueError, match="Beta parameter should be positive or zero"): - Beta(alpha=1.0, beta=-20.0, left_border=10.0, right_border=20.0) + Beta(alpha=1.0, beta=-20.0, left_border=10.0, right_border=20.0, dtype=dtype) - def test_beta_assignment_violation(self): + def test_beta_assignment_violation(self, dtype): """Tests that assigning a non-positive beta after initialization raises a ValueError.""" - dist = Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=20.0) + dist = Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=20.0, dtype=dtype) with pytest.raises(ValueError, match="Beta parameter should be positive or zero"): dist.beta = -1.0 with pytest.raises(ValueError, match="Beta parameter should be positive or zero"): dist.beta = -10.0 - def test_invariant_bounds_violation(self): - """Tests that initializing with a lower bound bigger upper bound raises a ValueError.""" + def test_invariant_bounds_violation(self, dtype): + """Tests that initializing with a left border bigger right border raises a ValueError.""" + with pytest.raises(ValueError, match="Left border must be less than right border"): - Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=5.0) + Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=5.0, dtype=dtype) with pytest.raises(ValueError, match="Left border must be less than right border"): - Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=10.0) + Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=10.0, dtype=dtype) - def test_repr_method(self): + def test_repr_method(self, dtype): """Tests that the __repr__ method provides a reproducible string.""" - dist = Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=20.0) + dist = Beta(alpha=1.1, beta=2.1, lower_bound=10.1, upper_bound=20.1, dtype=dtype) repr_str = repr(dist) - assert repr_str == "Beta(alpha=1.0, beta=2.0, left_border=10.0, right_border=20.0)" + assert ( + repr_str == f"Beta(alpha={dist.alpha}, beta={dist.beta}, left_border={dist.left_border}, " + f"right_border={dist.right_border}, dtype=np.{dtype.__name__})" + ) recreated_dist = eval(repr_str) assert dist == recreated_dist @@ -156,19 +159,23 @@ def test_repr_method(self): class TestBetaPDF: """Tests for the pdf method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_pdf_properties(self, x): + def test_pdf_properties(self, x, dtype): """Tests that the PDF is non-negative and has the correct return type and shape.""" + alpha, beta, left_border, right_border = 1.0, 1.0, 2.9, 10.0 - dist = Beta(alpha, beta, left_border, right_border) + dist = Beta(alpha, beta, left_border, right_border, dtype=dtype) pdf_values = dist.pdf(x) assert isinstance(pdf_values, np.ndarray) + assert pdf_values.dtype == dtype assert pdf_values.shape == x.shape assert np.all(pdf_values >= 0) @pytest.mark.parametrize("x,shape1,shape2,left_border,right_border,expected_pdf", load_r_test_cases()) def test_pdf_against_R(self, x, shape1, shape2, left_border, right_border, expected_pdf): """Compares the custom PDF implementation against scipy's implementation.""" + dist = Beta(shape1, shape2, left_border, right_border) custom_pdf = dist.pdf(x) np.testing.assert_allclose(custom_pdf, expected_pdf, atol=1e-9) @@ -176,6 +183,7 @@ def test_pdf_against_R(self, x, shape1, shape2, left_border, right_border, expec @given(params=st_valid_params()) def test_pdf_integral_is_one(self, params): """Tests that the integral of the PDF over its support is equal to 1.""" + shape1, shape2, left_border, right_border = params dist = Beta(shape1, shape2, left_border, right_border) integral, error = quad(lambda x: dist.pdf(x).item(), left_border, right_border) @@ -184,6 +192,7 @@ def test_pdf_integral_is_one(self, params): @given(x=st.floats(min_value=1e-4, max_value=1e2, allow_infinity=False)) def test_pdf_outside_support(self, x): """Tests that the PDF is zero for values less than the location parameter.""" + x_val = 2.0 - abs(x) dist = Beta(1.0, 1.0, 2.0, 10.0) assert dist.pdf(x_val) == 0.0 @@ -192,13 +201,16 @@ def test_pdf_outside_support(self, x): class TestBetaLPDF: """Tests for the lpdf (log-PDF) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(params=st_valid_params(), x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_lpdf_return_type_and_shape(self, params, x): + def test_lpdf_return_type_and_shape(self, params, x, dtype): """Tests the return type and shape of the lpdf method.""" + shape1, shape2, left_border, right_border = params - dist = Beta(shape1, shape2, left_border, right_border) + dist = Beta(shape1, shape2, left_border, right_border, dtype=dtype) lpdf_values = dist.lpdf(x) assert isinstance(lpdf_values, np.ndarray) + assert lpdf_values.dtype == dtype assert lpdf_values.shape == x.shape @given(params=st_valid_params(), x=st.floats(1e-6, 1e6)) @@ -218,6 +230,7 @@ def test_lpdf_against_scipy(self, params, x): @given(params=st_valid_params(), x=st.floats(max_value=-1e6, allow_infinity=False)) def test_lpdf_outside_support(self, params, x): """Tests that the LPDF is -inf for values less than the location parameter.""" + shape1, shape2, left_border, right_border = params x_val = left_border - abs(x) dist = Beta(shape1, shape2, left_border, right_border) @@ -227,20 +240,24 @@ def test_lpdf_outside_support(self, params, x): class TestBetaPPF: """Tests for the ppf (Percent Point Function) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given( params=st_valid_params(), p=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True)) ) - def test_ppf_return_type_and_shape(self, params, p): + def test_ppf_return_type_and_shape(self, params, p, dtype): """Tests the return type and shape of the ppf method.""" + shape1, shape2, left_border, right_border = params - dist = Beta(shape1, shape2, left_border, right_border) + dist = Beta(shape1, shape2, left_border, right_border, dtype=dtype) ppf_values = dist.ppf(p) assert isinstance(ppf_values, np.ndarray) + assert ppf_values.dtype == dtype assert ppf_values.shape == p.shape @given(params=st_valid_params(), p=st.floats(0, 1)) def test_ppf_against_scipy(self, params, p): """Compares the custom PPF implementation against scipy's implementation.""" + shape1, shape2, left_border, right_border = params dist = Beta(shape1, shape2, left_border, right_border) custom_ppf = dist.ppf(p) @@ -255,78 +272,88 @@ def test_ppf_invalid_input(self, p_val): assert np.isnan(dist.ppf(p_val)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestBetaGradients: """Tests for gradient calculation methods.""" h = 1e-6 @given(params_x=st_params_and_x_for_grad()) - def test_dlog_shape1_numerical(self, params_x): + def test_dlog_shape1_numerical(self, params_x, dtype): """Checks the analytical gradient for 'shape1' against a numerical approximation.""" - shape1, shape2, left_border, right_border, x = params_x - dist = Beta(shape1, shape2, left_border, right_border) - - lpdf_plus_h = Beta(shape1 + self.h, shape2, left_border, right_border).lpdf(x) - lpdf_minus_h = Beta(shape1 - self.h, shape2, left_border, right_border).lpdf(x) + shape1, shape2, left_border, right_border, x = params_x - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Beta(shape1, shape2, left_border, right_border, dtype=dtype) analytical_grad = dist._dlog_alpha(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Beta(shape1 + self.h, shape2, left_border, right_border).lpdf(x) + lpdf_minus_h = Beta(shape1 - self.h, shape2, left_border, right_border).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @given(params_x=st_params_and_x_for_grad()) - def test_dlog_shape2_numerical(self, params_x): + def test_dlog_shape2_numerical(self, params_x, dtype): """Checks the analytical gradient for 'shape2' against a numerical approximation.""" shape1, shape2, left_border, right_border, x = params_x - dist = Beta(shape1, shape2, left_border, right_border) - - lpdf_plus_h = Beta(shape1, shape2 + self.h, left_border, right_border).lpdf(x) - lpdf_minus_h = Beta(shape1, shape2 - self.h, left_border, right_border).lpdf(x) - - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Beta(shape1, shape2, left_border, right_border, dtype=dtype) analytical_grad = dist._dlog_beta(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Beta(shape1, shape2 + self.h, left_border, right_border).lpdf(x) + lpdf_minus_h = Beta(shape1, shape2 - self.h, left_border, right_border).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @given(params_x=st_params_and_x_for_grad()) - def test_dlog_left_border_numerical(self, params_x): + def test_dlog_left_border_numerical(self, params_x, dtype): """Checks the analytical gradient for 'left_border' against a numerical approximation.""" shape1, shape2, left_border, right_border, x = params_x - dist = Beta(shape1, shape2, left_border, right_border) - - lpdf_plus_h = Beta(shape1, shape2, left_border + self.h, right_border).lpdf(x) - lpdf_minus_h = Beta(shape1, shape2, left_border - self.h, right_border).lpdf(x) - - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Beta(shape1, shape2, left_border, right_border, dtype=dtype) analytical_grad = dist._dlog_left_border(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Beta(shape1, shape2, left_border + self.h, right_border).lpdf(x) + lpdf_minus_h = Beta(shape1, shape2, left_border - self.h, right_border).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @given(params_x=st_params_and_x_for_grad()) - def test_dlog_right_border_numerical(self, params_x): + def test_dlog_right_border_numerical(self, params_x, dtype): """Checks the analytical gradient for 'right_border' against a numerical approximation.""" shape1, shape2, left_border, right_border, x = params_x - dist = Beta(shape1, shape2, left_border, right_border) - - lpdf_plus_h = Beta(shape1, shape2, left_border, right_border + self.h).lpdf(x) - lpdf_minus_h = Beta(shape1, shape2, left_border, right_border - self.h).lpdf(x) - - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Beta(shape1, shape2, left_border, right_border, dtype=dtype) analytical_grad = dist._dlog_right_border(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Beta(shape1, shape2, left_border, right_border + self.h).lpdf(x) + lpdf_minus_h = Beta(shape1, shape2, left_border, right_border - self.h).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @pytest.mark.parametrize( "fixed_params, expected_shape_col, expected_params", @@ -338,10 +365,10 @@ def test_dlog_right_border_numerical(self, params_x): (["alpha", "beta", "left_border", "right_border"], 0, []), ], ) - def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params): + def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params, dtype): """Tests the structure and content of log_gradients with various fixed parameters.""" - dist = Beta(1.0, 1.0, 1.0, 10.0) + dist = Beta(1.0, 1.0, 1.0, 10.0, dtype=dtype) for param in fixed_params: dist.fix_param(param) @@ -349,6 +376,7 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte gradients = dist.log_gradients(x) assert isinstance(gradients, np.ndarray) + assert gradients.dtype == dtype assert gradients.shape == (len(x), expected_shape_col) if "alpha" in expected_params: @@ -365,43 +393,40 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte np.testing.assert_allclose(gradients[:, idx], dist._dlog_right_border(x)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestBetaGenerate: """Tests for the generate method.""" - def test_generate_type_and_shape(self): + def test_generate_type_and_shape(self, dtype): """Tests that generated samples have the correct type and shape.""" np.random.seed(42) - random.seed(42) - dist = Beta(1.0, 1.0, 1.0, 10.0) + dist = Beta(1.0, 1.0, 1.0, 10.0, dtype=dtype) size = 100 samples = dist.generate(size=size) assert isinstance(samples, np.ndarray) - assert samples.dtype == np.float64 + assert samples.dtype == dtype assert samples.shape == (size,) - def test_generate_zero_size(self): + def test_generate_zero_size(self, dtype): """Tests if the generating 0 number of samples returns an empty array""" - - dist = Beta(1.0, 1.0, 1.0, 10.0) + dist = Beta(1.0, 1.0, 1.0, 10.0, dtype=dtype) assert len(dist.generate(size=0)) == 0 @pytest.mark.parametrize("size", [-1, -10]) - def test_generate_negative_size(self, size): + def test_generate_negative_size(self, size, dtype): """Tests that generating a negative number of samples raises ValueError.""" - - dist = Beta(1.0, 1.0, 1.0, 10.0) - + dist = Beta(1.0, 1.0, 1.0, 10.0, dtype=dtype) with pytest.raises(ValueError): dist.generate(size=size) - def test_generate_statistical_properties(self): + def test_generate_statistical_properties(self, dtype): """Tests if the generated samples have correct statistical properties (mean, variance).""" np.random.seed(123) random.seed(123) shape1, shape2, left_border, right_border = 1.0, 1.0, 1.0, 10.0 - dist = Beta(shape1, shape2, left_border, right_border) + dist = Beta(shape1, shape2, left_border, right_border, dtype=dtype) size = 20000 samples = dist.generate(size=size) @@ -412,16 +437,16 @@ def test_generate_statistical_properties(self): (right_border - left_border) ** 2 * shape1 * shape2 / ((shape1 + shape2) ** 2 * (shape1 + shape2 + 1)) ) - assert np.mean(samples) == pytest.approx(theoretical_mean, rel=0.1) - assert np.var(samples) == pytest.approx(theoretical_var, rel=0.1) + assert np.mean(samples, dtype=np.float64) == pytest.approx(theoretical_mean, rel=0.1) + assert np.var(samples, dtype=np.float64) == pytest.approx(theoretical_var, rel=0.1) - def test_generate_kolmogorov_smirnov(self): + def test_generate_kolmogorov_smirnov(self, dtype): """Performs a Kolmogorov-Smirnov test to check if samples fit the distribution.""" np.random.seed(456) random.seed(456) shape1, shape2, left_border, right_border = 1.0, 1.0, 1.0, 10.0 - dist = Beta(shape1, shape2, left_border, right_border) + dist = Beta(shape1, shape2, left_border, right_border, dtype=dtype) size = 1000 samples = dist.generate(size=size) @@ -429,30 +454,3 @@ def test_generate_kolmogorov_smirnov(self): ks_statistic, p_value = kstest(samples, "beta", args=(shape1, shape2, left_border, right_border - left_border)) lower_bound = 0.05 assert p_value > lower_bound - - -@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) -class TestBetaDType(DTypeHandlingMixin): - distribution_class = Beta - default_params: ClassVar[dict] = {"alpha": 1.0, "beta": 2.0, "left_border": -1.0, "right_border": 1.0} - - def test_init_with_dtype_sets_correct_types(self, dtype): - self.check_init_with_dtype_sets_correct_types(dtype) - - @pytest.mark.parametrize("size", [0, 10]) - def test_generate_returns_correct_dtype(self, size, dtype): - self.check_generate_returns_correct_dtype(size, dtype) - - @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1, 1))) - def test_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): - self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) - - @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) - def test_ppf_returns_correct_dtype(self, p_data, dtype): - self.check_ppf_returns_correct_dtype(p_data, dtype) - - @pytest.mark.parametrize("method_name", ["_dlog_alpha", "_dlog_beta", "_dlog_left_border", "_dlog_right_border"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1, 1))) - def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): - self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_cauchy.py b/rework_tests/unit/distributions/test_cauchy.py index e80e91fd..098d5916 100644 --- a/rework_tests/unit/distributions/test_cauchy.py +++ b/rework_tests/unit/distributions/test_cauchy.py @@ -6,7 +6,6 @@ import random -from typing import ClassVar import numpy as np import pytest @@ -14,7 +13,6 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Cauchy -from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import cauchy, kstest @@ -24,54 +22,55 @@ st_loc = st.floats(min_value=-1e3, max_value=1e3, allow_nan=False, allow_infinity=False) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestCauchyInitialization: """Tests for the __init__ method and basic properties.""" - def test_initialization_successful(self): + def test_initialization_successful(self, dtype): """Tests that the instance is initialized correctly with valid parameters.""" loc, scale = 0.5, 2.0 - dist = Cauchy(loc=loc, scale=scale) - assert isinstance(dist.loc, float) - assert isinstance(dist.scale, float) - assert dist.loc == loc - assert dist.scale == scale + dist = Cauchy(loc=loc, scale=scale, dtype=dtype) + assert dist.loc.dtype == dtype + assert dist.scale.dtype == dtype + assert dist.loc == dtype(loc) + assert dist.scale == dtype(scale) - def test_name_property(self): + def test_name_property(self, dtype): """Tests that the name property returns the correct string.""" - dist = Cauchy(loc=0.0, scale=1.0) + dist = Cauchy(loc=0.0, scale=1.0, dtype=dtype) assert dist.name == "Cauchy" - def test_params_property(self): + def test_params_property(self, dtype): """Tests that the params property returns the correct set of parameter names.""" - dist = Cauchy(loc=0.0, scale=1.0) + dist = Cauchy(loc=0.0, scale=1.0, dtype=dtype) assert dist.params == {"loc", "scale"} - def test_scale_invariant_violation(self): + def test_scale_invariant_violation(self, dtype): """Tests that assigning a non-positive rate after initialization raises a ValueError.""" with pytest.raises(ValueError, match="Scale parameter should be positive"): - Cauchy(loc=0.0, scale=-10.0) + Cauchy(loc=0.0, scale=-10.0, dtype=dtype) with pytest.raises(ValueError, match="Scale parameter should be positive"): - Cauchy(loc=0.0, scale=-0.02) + Cauchy(loc=0.0, scale=-0.02, dtype=dtype) - def test_scale_assignment_violation(self): + def test_scale_assignment_violation(self, dtype): """Tests that assigning a non-positive rate after initialization raises a ValueError.""" - dist = Cauchy(loc=0.0, scale=1.0) + dist = Cauchy(loc=0.0, scale=1.0, dtype=dtype) with pytest.raises(ValueError, match="Scale parameter should be positive"): dist.scale = 0.0 with pytest.raises(ValueError, match="Scale parameter should be positive"): dist.scale = -10.0 - def test_repr_method(self): + def test_repr_method(self, dtype): """Tests that the __repr__ method provides a reproducible string.""" - dist = Cauchy(loc=1.23, scale=4.56) + dist = Cauchy(loc=1.23, scale=4.56, dtype=dtype) repr_str = repr(dist) - assert repr_str == "Cauchy(loc=1.23, scale=4.56)" + assert repr_str == f"Cauchy(loc={dist.loc}, scale={dist.scale}, dtype=np.{dtype.__name__})" recreated_dist = eval(repr_str) assert dist == recreated_dist @@ -80,13 +79,16 @@ def test_repr_method(self): class TestCauchyPDF: """Tests for the pdf method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_pdf_properties(self, loc, scale, x): + def test_pdf_properties(self, loc, scale, x, dtype): """Tests that the PDF is non-negative and has the correct return type and shape.""" - dist = Cauchy(loc=loc, scale=scale) + loc, scale = 0.0, 1.0 + dist = Cauchy(loc=loc, scale=scale, dtype=dtype) pdf_values = dist.pdf(x) assert isinstance(pdf_values, np.ndarray) + assert pdf_values.dtype == dtype assert pdf_values.shape == x.shape assert np.all(pdf_values >= 0) @@ -112,13 +114,15 @@ def test_pdf_integral_is_one(self, loc, scale): class TestLogCauchyPDF: """Tests for the lpdf (log-PDF) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_lpdf_return_type_and_shape(self, loc, scale, x): + def test_lpdf_return_type_and_shape(self, loc, scale, x, dtype): """Tests the return type and shape of the lpdf method.""" - dist = Cauchy(loc=loc, scale=scale) + dist = Cauchy(loc=loc, scale=scale, dtype=dtype) lpdf_values = dist.lpdf(x) assert isinstance(lpdf_values, np.ndarray) + assert lpdf_values.dtype == dtype assert lpdf_values.shape == x.shape @given(loc=st_loc, scale=st_scale, x=st.floats(1e-6, 1e6)) @@ -134,15 +138,17 @@ def test_lpdf_against_scipy(self, loc, scale, x): class TestCauchyPPF: """Tests for the ppf (Percent Point Function) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given( loc=st_loc, scale=st_scale, p=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True)) ) - def test_ppf_return_type_and_shape(self, loc, scale, p): + def test_ppf_return_type_and_shape(self, loc, scale, p, dtype): """Tests the return type and shape of the ppf method.""" - dist = Cauchy(loc=loc, scale=scale) + dist = Cauchy(loc=loc, scale=scale, dtype=dtype) ppf_values = dist.ppf(p) assert isinstance(ppf_values, np.ndarray) + assert ppf_values.dtype == dtype assert ppf_values.shape == p.shape @given(loc=st_loc, scale=st_scale, p=st.floats(0, 1, exclude_max=True, exclude_min=True)) @@ -162,55 +168,60 @@ def test_ppf_invalid_input(self, p_val): assert np.isnan(dist.ppf(p_val)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestCauchyGradients: """Tests for gradient calculation methods.""" h = 1e-6 @given(loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(1e-3, 1e3))) - def test_dlog_loc_numerical(self, loc, scale, x): + def test_dlog_loc_numerical(self, loc, scale, x, dtype): """Checks the analytical gradient for 'loc' against a numerical approximation.""" assume(np.all(x > (loc + self.h))) - dist = Cauchy(loc=loc, scale=scale) - - lpdf_plus_h = Cauchy(loc=loc + self.h, scale=scale).lpdf(x) - lpdf_minus_h = Cauchy(loc=loc - self.h, scale=scale).lpdf(x) - - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Cauchy(loc, scale, dtype=dtype) analytical_grad = dist._dlog_loc(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Cauchy(loc + self.h, scale).lpdf(x) + lpdf_minus_h = Cauchy(loc - self.h, scale).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @given(loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(1e-3, 1e3))) - def test_dlog_scale_numerical(self, loc, scale, x): + def test_dlog_scale_numerical(self, loc, scale, x, dtype): """Checks the analytical gradient for 'scale' against a numerical approximation.""" assume(np.all(x > (loc + self.h))) - dist = Cauchy(loc=loc, scale=scale) - - lpdf_plus_h = Cauchy(loc=loc, scale=scale + self.h).lpdf(x) - lpdf_minus_h = Cauchy(loc=loc, scale=scale - self.h).lpdf(x) - - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Cauchy(loc, scale, dtype=dtype) analytical_grad = dist._dlog_scale(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Cauchy(loc, scale + self.h).lpdf(x) + lpdf_minus_h = Cauchy(loc, scale - self.h).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) @pytest.mark.parametrize( "fixed_params, expected_shape_col, expected_params", [([], 2, ["loc", "scale"]), (["loc"], 1, ["scale"]), (["scale"], 1, ["loc"]), (["loc", "scale"], 0, [])], ) - def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params): + def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params, dtype): """Tests the structure and content of log_gradients with various fixed parameters.""" - dist = Cauchy(loc=1.0, scale=2.0) + dist = Cauchy(loc=1.0, scale=2.0, dtype=dtype) for param in fixed_params: dist.fix_param(param) @@ -218,6 +229,7 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte gradients = dist.log_gradients(x) assert isinstance(gradients, np.ndarray) + assert gradients.dtype == dtype assert gradients.shape == (len(x), expected_shape_col) if "loc" in expected_params: @@ -228,43 +240,44 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte np.testing.assert_allclose(gradients[:, idx], dist._dlog_scale(x)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestCauchyGenerate: """Tests for the generate method.""" - def test_generate_type_and_shape(self): + def test_generate_type_and_shape(self, dtype): """Tests that generated samples have the correct type and shape.""" np.random.seed(42) random.seed(42) - dist = Cauchy(loc=0.0, scale=2.0) + dist = Cauchy(loc=0.0, scale=2.0, dtype=dtype) size = 100 samples = dist.generate(size=size) assert isinstance(samples, np.ndarray) - assert samples.dtype == np.float64 + assert samples.dtype == dtype assert samples.shape == (size,) - def test_generate_zero_size(self): + def test_generate_zero_size(self, dtype): """Tests if the generating 0 number of samples returns an empty array""" - dist = Cauchy(loc=0.0, scale=1.0) + dist = Cauchy(loc=0.0, scale=1.0, dtype=dtype) assert len(dist.generate(size=0)) == 0 @pytest.mark.parametrize("size", [-1, -10]) - def test_generate_negative_size(self, size): + def test_generate_negative_size(self, size, dtype): """Tests that generating a negative number of samples raises ValueError.""" - dist = Cauchy(loc=0.0, scale=1.0) + dist = Cauchy(loc=0.0, scale=1.0, dtype=dtype) with pytest.raises(ValueError): dist.generate(size=size) - def test_generate_kolmogorov_smirnov(self): + def test_generate_kolmogorov_smirnov(self, dtype): """Performs a Kolmogorov-Smirnov test to check if samples fit the distribution.""" np.random.seed(456) random.seed(456) loc, scale = 10.0, 2.0 - dist = Cauchy(loc=loc, scale=scale) + dist = Cauchy(loc=loc, scale=scale, dtype=dtype) size = 1000 samples = dist.generate(size=size) @@ -272,30 +285,3 @@ def test_generate_kolmogorov_smirnov(self): ks_statistic, p_value = kstest(samples, "cauchy", args=(loc, scale)) lower_bound = 0.05 assert p_value > lower_bound - - -@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) -class TestCauchyDType(DTypeHandlingMixin): - distribution_class = Cauchy - default_params: ClassVar[dict] = {"loc": 0.0, "scale": 1.0} - - def test_init_with_dtype_sets_correct_types(self, dtype): - self.check_init_with_dtype_sets_correct_types(dtype) - - @pytest.mark.parametrize("size", [0, 10]) - def test_generate_returns_correct_dtype(self, size, dtype): - self.check_generate_returns_correct_dtype(size, dtype) - - @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e3))) - def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): - self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) - - @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) - def check_ppf_returns_correct_dtype(self, p_data, dtype): - self.check_ppf_returns_correct_dtype(p_data, dtype) - - @pytest.mark.parametrize("method_name", ["_dlog_loc", "_dlog_scale"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e6))) - def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): - self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_continuous_distribution.py b/rework_tests/unit/distributions/test_continuous_distribution.py index dd096e0f..876f2fea 100644 --- a/rework_tests/unit/distributions/test_continuous_distribution.py +++ b/rework_tests/unit/distributions/test_continuous_distribution.py @@ -6,7 +6,6 @@ from copy import copy -from typing import ClassVar import numpy as np import pytest @@ -427,62 +426,3 @@ def test_hash_inequality_dtype(self): d2 = DummyDistribution(param1=1.0, param2=2.0, dtype=np.float32) assert d1 != d2 assert hash(d1) != hash(d2) - - -class DTypeHandlingMixin: - """A test mixin to verify correct dtype handling in all subclasses of ContinuousDistribution.""" - - distribution_class: ClassVar[type[ContinuousDistribution] | None] = None - default_params: ClassVar[dict] = {} - - # Tests initialization - # -------------------- - - def check_init_with_dtype_sets_correct_types(self, dtype): - """Tests that the constructor and the Parameter descriptor correctly cast parameter types.""" - - dist = self.distribution_class(**self.default_params, dtype=dtype) - - assert dist.dtype == dtype - - for param_name in self.default_params: - param_value = getattr(dist, param_name) - assert isinstance(param_value, dtype) - - # Tests generate - # -------------------- - - def check_generate_returns_correct_dtype(self, size, dtype): - """Tests the dtype of the array returned by the generate method.""" - dist = self.distribution_class(**self.default_params, dtype=dtype) - - result = dist.generate(size=size) - - assert isinstance(result, np.ndarray) - assert result.dtype == dtype - - # Tests calculations - # -------------------- - - def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): - """Helper method with the logic for testing methods that take X.""" - dist = self.distribution_class(**self.default_params, dtype=dtype) - method_to_test = getattr(dist, method_name) - result = method_to_test(x_data) - assert isinstance(result, np.ndarray) - assert result.dtype == dtype - - def check_ppf_returns_correct_dtype(self, p_data, dtype): - """Helper method with the logic for testing the ppf method.""" - dist = self.distribution_class(**self.default_params, dtype=dtype) - result = dist.ppf(p_data) - assert isinstance(result, np.ndarray) - assert result.dtype == dtype - - def check_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): - """Tests that each partial derivative method (_dlog_*) returns a NumPy array with the correct dtype.""" - - dist = self.distribution_class(**self.default_params, dtype=dtype) - method = getattr(dist, method_name) - - assert method(x_data).dtype == dtype diff --git a/rework_tests/unit/distributions/test_exponential.py b/rework_tests/unit/distributions/test_exponential.py index 41e15161..c1a05a35 100644 --- a/rework_tests/unit/distributions/test_exponential.py +++ b/rework_tests/unit/distributions/test_exponential.py @@ -6,7 +6,6 @@ import random -from typing import ClassVar import numpy as np import pytest @@ -14,7 +13,6 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Exponential -from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import expon, kstest @@ -24,54 +22,55 @@ st_loc = st.floats(min_value=-1e3, max_value=1e3, allow_nan=False, allow_infinity=False) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestExponentialInitialization: """Tests for the __init__ method and basic properties.""" - def test_initialization_successful(self): + def test_initialization_successful(self, dtype): """Tests that the instance is initialized correctly with valid parameters.""" loc, rate = 0.5, 2.0 - dist = Exponential(loc=loc, rate=rate) - assert isinstance(dist.loc, float) - assert isinstance(dist.rate, float) - assert dist.loc == loc - assert dist.rate == rate + dist = Exponential(loc=loc, rate=rate, dtype=dtype) + assert dist.loc.dtype == dtype + assert dist.rate.dtype == dtype + assert dist.loc == dtype(loc) + assert dist.rate == dtype(rate) - def test_name_property(self): + def test_name_property(self, dtype): """Tests that the name property returns the correct string.""" - dist = Exponential(loc=0.0, rate=1.0) + dist = Exponential(loc=0.0, rate=1.0, dtype=dtype) assert dist.name == "Exponential" - def test_params_property(self): + def test_params_property(self, dtype): """Tests that the params property returns the correct set of parameter names.""" - dist = Exponential(loc=0.0, rate=1.0) + dist = Exponential(loc=0.0, rate=1.0, dtype=dtype) assert dist.params == {"loc", "rate"} - def test_rate_invariant_violation(self): + def test_rate_invariant_violation(self, dtype): """Tests that initializing with a non-positive rate raises a ValueError.""" with pytest.raises(ValueError, match="Rate parameter must be a positive"): - Exponential(loc=0.0, rate=0.0) + Exponential(loc=0.0, rate=0.0, dtype=dtype) with pytest.raises(ValueError, match="Rate parameter must be a positive"): - Exponential(loc=0.0, rate=-1.0) + Exponential(loc=0.0, rate=-1.0, dtype=dtype) - def test_rate_assignment_violation(self): + def test_rate_assignment_violation(self, dtype): """Tests that assigning a non-positive rate after initialization raises a ValueError.""" - dist = Exponential(loc=0.0, rate=1.0) + dist = Exponential(loc=0.0, rate=1.0, dtype=dtype) with pytest.raises(ValueError, match="Rate parameter must be a positive"): dist.rate = 0.0 with pytest.raises(ValueError, match="Rate parameter must be a positive"): dist.rate = -10.0 - def test_repr_method(self): + def test_repr_method(self, dtype): """Tests that the __repr__ method provides a reproducible string.""" - dist = Exponential(loc=1.23, rate=4.56) + dist = Exponential(loc=1.23, rate=4.56, dtype=dtype) repr_str = repr(dist) - assert repr_str == "Exponential(loc=1.23, rate=4.56)" + assert repr_str == f"Exponential(loc={dist.loc}, rate={dist.rate}, dtype=np.{dtype.__name__})" recreated_dist = eval(repr_str) assert recreated_dist == dist @@ -80,13 +79,15 @@ def test_repr_method(self): class TestExponentialPDF: """Tests for the pdf method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(loc=st_loc, rate=st_rate, x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_pdf_properties(self, loc, rate, x): + def test_pdf_properties(self, loc, rate, x, dtype): """Tests that the PDF is non-negative and has the correct return type and shape.""" - dist = Exponential(loc=loc, rate=rate) + dist = Exponential(loc=loc, rate=rate, dtype=dtype) pdf_values = dist.pdf(x) assert isinstance(pdf_values, np.ndarray) + assert pdf_values.dtype == dtype assert pdf_values.shape == x.shape assert np.all(pdf_values >= 0) @@ -121,13 +122,15 @@ def test_pdf_outside_support(self, loc, rate, x): class TestExponentialLPDF: """Tests for the lpdf (log-PDF) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(loc=st_loc, rate=st_rate, x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_lpdf_return_type_and_shape(self, loc, rate, x): + def test_lpdf_return_type_and_shape(self, loc, rate, x, dtype): """Tests the return type and shape of the lpdf method.""" - dist = Exponential(loc=loc, rate=rate) + dist = Exponential(loc=loc, rate=rate, dtype=dtype) lpdf_values = dist.lpdf(x) assert isinstance(lpdf_values, np.ndarray) + assert lpdf_values.dtype == dtype assert lpdf_values.shape == x.shape @given(loc=st_loc, rate=st_rate, x=st.floats(1e-6, 1e6)) @@ -153,18 +156,20 @@ def test_lpdf_outside_support(self, loc, rate, x): class TestExponentialPPF: """Tests for the ppf (Percent Point Function) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given( loc=st_loc, rate=st_rate, p=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True)) ) - def test_ppf_return_type_and_shape(self, loc, rate, p): + def test_ppf_return_type_and_shape(self, loc, rate, p, dtype): """Tests the return type and shape of the ppf method.""" - dist = Exponential(loc=loc, rate=rate) + dist = Exponential(loc=loc, rate=rate, dtype=dtype) ppf_values = dist.ppf(p) assert isinstance(ppf_values, np.ndarray) + assert ppf_values.dtype == dtype assert ppf_values.shape == p.shape - @given(loc=st_loc, rate=st_rate, p=st.floats(0, 1)) + @given(loc=st_loc, rate=st_rate, p=st.floats(0, 1, exclude_max=True, exclude_min=True)) def test_ppf_against_scipy(self, loc, rate, p): """Compares the custom PPF implementation against scipy's implementation.""" @@ -181,55 +186,60 @@ def test_ppf_invalid_input(self, p_val): assert np.isnan(dist.ppf(p_val)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestExponentialGradients: """Tests for gradient calculation methods.""" h = 1e-6 @given(loc=st_loc, rate=st_rate, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(1e-3, 1e3))) - def test_dlog_loc_numerical(self, loc, rate, x): + def test_dlog_loc_numerical(self, loc, rate, x, dtype): """Checks the analytical gradient for 'loc' against a numerical approximation.""" assume(np.all(x > (loc + self.h))) - dist = Exponential(loc=loc, rate=rate) - - lpdf_plus_h = Exponential(loc=loc + self.h, rate=rate).lpdf(x) - lpdf_minus_h = Exponential(loc=loc - self.h, rate=rate).lpdf(x) - - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Exponential(loc, rate, dtype=dtype) analytical_grad = dist._dlog_loc(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Exponential(loc + self.h, rate).lpdf(x) + lpdf_minus_h = Exponential(loc - self.h, rate).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @given(loc=st_loc, rate=st_rate, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(1e-3, 1e3))) - def test_dlog_rate_numerical(self, loc, rate, x): + def test_dlog_rate_numerical(self, loc, rate, x, dtype): """Checks the analytical gradient for 'rate' against a numerical approximation.""" assume(np.all(x > (loc + self.h))) - dist = Exponential(loc=loc, rate=rate) - - lpdf_plus_h = Exponential(loc=loc, rate=rate + self.h).lpdf(x) - lpdf_minus_h = Exponential(loc=loc, rate=rate - self.h).lpdf(x) - - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Exponential(loc, rate, dtype=dtype) analytical_grad = dist._dlog_rate(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Exponential(loc, rate + self.h).lpdf(x) + lpdf_minus_h = Exponential(loc, rate - self.h).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) @pytest.mark.parametrize( "fixed_params, expected_shape_col, expected_params", [([], 2, ["loc", "rate"]), (["loc"], 1, ["rate"]), (["rate"], 1, ["loc"]), (["loc", "rate"], 0, [])], ) - def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params): + def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params, dtype): """Tests the structure and content of log_gradients with various fixed parameters.""" - dist = Exponential(loc=1.0, rate=2.0) + dist = Exponential(loc=1.0, rate=2.0, dtype=dtype) for param in fixed_params: dist.fix_param(param) @@ -237,6 +247,7 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte gradients = dist.log_gradients(x) assert isinstance(gradients, np.ndarray) + assert gradients.dtype == dtype assert gradients.shape == (len(x), expected_shape_col) if "loc" in expected_params: @@ -247,43 +258,44 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte np.testing.assert_allclose(gradients[:, idx], dist._dlog_rate(x)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestExponentialGenerate: """Tests for the generate method.""" - def test_generate_type_and_shape(self): + def test_generate_type_and_shape(self, dtype): """Tests that generated samples have the correct type and shape.""" np.random.seed(42) random.seed(42) - dist = Exponential(loc=0.0, rate=2.0) + dist = Exponential(loc=0.0, rate=2.0, dtype=dtype) size = 100 samples = dist.generate(size=size) assert isinstance(samples, np.ndarray) - assert samples.dtype == np.float64 + assert samples.dtype == dtype assert samples.shape == (size,) - def test_generate_zero_size(self): + def test_generate_zero_size(self, dtype): """Tests if the generating 0 number of samples returns an empty array""" - dist = Exponential(loc=0.0, rate=1.0) + dist = Exponential(loc=0.0, rate=1.0, dtype=dtype) assert len(dist.generate(size=0)) == 0 @pytest.mark.parametrize("size", [-1, -10]) - def test_generate_negative_size(self, size): + def test_generate_negative_size(self, size, dtype): """Tests that generating a negative number of samples raises ValueError.""" - dist = Exponential(loc=0.0, rate=1.0) + dist = Exponential(loc=0.0, rate=1.0, dtype=dtype) with pytest.raises(ValueError): dist.generate(size=size) - def test_generate_statistical_properties(self): + def test_generate_statistical_properties(self, dtype): """Tests if the generated samples have correct statistical properties (mean, variance).""" np.random.seed(123) random.seed(123) loc, rate = 5.0, 0.5 - dist = Exponential(loc=loc, rate=rate) + dist = Exponential(loc=loc, rate=rate, dtype=dtype) size = 20000 samples = dist.generate(size=size) @@ -291,16 +303,16 @@ def test_generate_statistical_properties(self): theoretical_mean = loc + 1 / rate theoretical_var = (1 / rate) ** 2 - assert np.mean(samples) == pytest.approx(theoretical_mean, rel=0.1) - assert np.var(samples) == pytest.approx(theoretical_var, rel=0.1) + assert np.mean(samples, dtype=np.float64) == pytest.approx(theoretical_mean, rel=0.1) + assert np.var(samples, dtype=np.float64) == pytest.approx(theoretical_var, rel=0.1) - def test_generate_kolmogorov_smirnov(self): + def test_generate_kolmogorov_smirnov(self, dtype): """Performs a Kolmogorov-Smirnov test to check if samples fit the distribution.""" np.random.seed(456) random.seed(456) loc, rate = 10.0, 2.0 - dist = Exponential(loc=loc, rate=rate) + dist = Exponential(loc=loc, rate=rate, dtype=dtype) size = 1000 samples = dist.generate(size=size) @@ -308,30 +320,3 @@ def test_generate_kolmogorov_smirnov(self): ks_statistic, p_value = kstest(samples, "expon", args=(loc, 1 / rate)) lower_bound = 0.05 assert p_value > lower_bound - - -@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) -class TestExponentialDType(DTypeHandlingMixin): - distribution_class = Exponential - default_params: ClassVar[dict] = {"loc": 0.0, "rate": 1.0} - - def test_init_with_dtype_sets_correct_types(self, dtype): - self.check_init_with_dtype_sets_correct_types(dtype) - - @pytest.mark.parametrize("size", [0, 10]) - def test_generate_returns_correct_dtype(self, size, dtype): - self.check_generate_returns_correct_dtype(size, dtype) - - @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e3))) - def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): - self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) - - @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) - def check_ppf_returns_correct_dtype(self, p_data, dtype): - self.check_ppf_returns_correct_dtype(p_data, dtype) - - @pytest.mark.parametrize("method_name", ["_dlog_loc", "_dlog_rate"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e6))) - def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): - self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_normal.py b/rework_tests/unit/distributions/test_normal.py index c22864d8..0ad39301 100644 --- a/rework_tests/unit/distributions/test_normal.py +++ b/rework_tests/unit/distributions/test_normal.py @@ -5,7 +5,6 @@ __license__ = "SPDX-License-Identifier: MIT" import random -from typing import ClassVar import numpy as np import pytest @@ -13,7 +12,6 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Normal -from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import kstest, norm @@ -24,45 +22,55 @@ st_scale = st.floats(min_value=0.01, max_value=1e3, allow_nan=False, allow_infinity=False) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestNormalInitialization: """Tests for the __init__ method and basic properties.""" - def test_initialization_successful(self): + def test_initialization_successful(self, dtype): """Tests that the instance is initialized correctly with valid parameters.""" loc, scale = 10.0, 2.5 - dist = Normal(loc=loc, scale=scale) - assert isinstance(dist.loc, float) - assert isinstance(dist.scale, float) - assert dist.loc == loc - assert dist.scale == scale + dist = Normal(loc=loc, scale=scale, dtype=dtype) + assert dist.loc.dtype == dtype + assert dist.scale.dtype == dtype + assert dist.loc == dtype(loc) + assert dist.scale == dtype(scale) - def test_name_property(self): + def test_name_property(self, dtype): """Tests that the name property returns the correct string.""" - dist = Normal(loc=0.0, scale=1.0) + dist = Normal(loc=0.0, scale=1.0, dtype=dtype) assert dist.name == "Normal" - def test_params_property(self): + def test_params_property(self, dtype): """Tests that the params property returns the correct set of parameter names.""" - dist = Normal(loc=0.0, scale=1.0) + dist = Normal(loc=0.0, scale=1.0, dtype=dtype) assert dist.params == {"loc", "scale"} - def test_scale_invariant_violation(self): + def test_scale_invariant_violation(self, dtype): """Tests that initializing with a non-positive scale raises a ValueError.""" with pytest.raises(ValueError, match="Scale parameter must be positive"): - Normal(loc=0.0, scale=0.0) + Normal(loc=0.0, scale=0.0, dtype=dtype) + with pytest.raises(ValueError, match="Scale parameter must be positive"): + Normal(loc=0.0, scale=-1.0, dtype=dtype) + + def test_scale_assignment_violation(self, dtype): + """Tests that assigning a non-positive rate after initialization raises a ValueError.""" + + dist = Normal(loc=0.0, scale=1.0, dtype=dtype) + with pytest.raises(ValueError, match="Scale parameter must be positive"): + dist.scale = 0.0 with pytest.raises(ValueError, match="Scale parameter must be positive"): - Normal(loc=0.0, scale=-1.0) + dist.scale = -10.0 - def test_repr_method(self): + def test_repr_method(self, dtype): """Tests that the __repr__ method provides a reproducible string.""" - dist = Normal(loc=1.23, scale=4.56) + dist = Normal(loc=1.23, scale=4.56, dtype=dtype) repr_str = repr(dist) - assert repr_str == "Normal(loc=1.23, scale=4.56)" + assert repr_str == f"Normal(loc={dist.loc}, scale={dist.scale}, dtype=np.{dtype.__name__})" recreated_dist = eval(repr_str) assert dist == recreated_dist @@ -71,13 +79,15 @@ def test_repr_method(self): class TestNormalPDF: """Tests for the pdf method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_pdf_properties(self, loc, scale, x): + def test_pdf_properties(self, loc, scale, x, dtype): """Tests that the PDF is non-negative and has the correct return type and shape.""" - dist = Normal(loc=loc, scale=scale) + dist = Normal(loc=loc, scale=scale, dtype=dtype) pdf_values = dist.pdf(x) assert isinstance(pdf_values, np.ndarray) + assert pdf_values.dtype == dtype assert pdf_values.shape == x.shape assert np.all(pdf_values >= 0) @@ -102,13 +112,15 @@ def test_pdf_integral_is_one(self, loc, scale): class TestNormalLPDF: """Tests for the lpdf (log-PDF) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_lpdf_return_type_and_shape(self, loc, scale, x): + def test_lpdf_return_type_and_shape(self, loc, scale, x, dtype): """Tests the return type and shape of the lpdf method.""" - dist = Normal(loc=loc, scale=scale) + dist = Normal(loc=loc, scale=scale, dtype=dtype) lpdf_values = dist.lpdf(x) assert isinstance(lpdf_values, np.ndarray) + assert lpdf_values.dtype == dtype assert lpdf_values.shape == x.shape @given(loc=st_loc, scale=st_scale, x=st.floats(-1e6, 1e6)) @@ -124,13 +136,15 @@ def test_lpdf_against_scipy(self, loc, scale, x): class TestNormalPPF: """Tests for the ppf (Percent Point Function) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(loc=st_loc, scale=st_scale, p=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1))) - def test_ppf_return_type_and_shape(self, loc, scale, p): + def test_ppf_return_type_and_shape(self, loc, scale, p, dtype): """Tests the return type and shape of the ppf method.""" - dist = Normal(loc=loc, scale=scale) + dist = Normal(loc=loc, scale=scale, dtype=dtype) ppf_values = dist.ppf(p) assert isinstance(ppf_values, np.ndarray) + assert ppf_values.dtype == dtype assert ppf_values.shape == p.shape @given(loc=st_loc, scale=st_scale, p=st.floats(1e-6, 1.0 - 1e-6)) @@ -143,41 +157,56 @@ def test_ppf_against_scipy(self, loc, scale, p): np.testing.assert_allclose(custom_ppf, scipy_ppf, atol=1e-9) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestNormalGradients: """Tests for gradient calculation methods.""" h = 1e-6 @given(loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(-1e3, 1e3))) - def test_dlog_loc_numerical(self, loc, scale, x): + def test_dlog_loc_numerical(self, loc, scale, x, dtype): """Checks the analytical gradient for 'loc' against a numerical approximation.""" - dist = Normal(loc=loc, scale=scale) - lpdf_plus_h = Normal(loc=loc + self.h, scale=scale).lpdf(x) - lpdf_minus_h = Normal(loc=loc - self.h, scale=scale).lpdf(x) - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Normal(loc, scale, dtype=dtype) analytical_grad = dist._dlog_loc(x) - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + + assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype + assert analytical_grad.shape == x.shape + + if dtype == np.float64: + lpdf_plus_h = Normal(loc + self.h, scale).lpdf(x) + lpdf_minus_h = Normal(loc - self.h, scale).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @given(loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(-1e3, 1e3))) - def test_dlog_scale_numerical(self, loc, scale, x): + def test_dlog_scale_numerical(self, loc, scale, x, dtype): """Checks the analytical gradient for 'scale' against a numerical approximation.""" - dist = Normal(loc=loc, scale=scale) - lpdf_plus_h = Normal(loc=loc, scale=scale + self.h).lpdf(x) - lpdf_minus_h = Normal(loc=loc, scale=scale - self.h).lpdf(x) - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Normal(loc, scale, dtype=dtype) analytical_grad = dist._dlog_scale(x) - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) + + assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype + assert analytical_grad.shape == x.shape + + if dtype == np.float64: + lpdf_plus_h = Normal(loc, scale + self.h).lpdf(x) + lpdf_minus_h = Normal(loc, scale - self.h).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) @pytest.mark.parametrize( "fixed_params, expected_cols, expected_params", [([], 2, ["loc", "scale"]), (["loc"], 1, ["scale"]), (["scale"], 1, ["loc"]), (["loc", "scale"], 0, [])], ) - def test_log_gradients_structure(self, fixed_params, expected_cols, expected_params): + def test_log_gradients_structure(self, fixed_params, expected_cols, expected_params, dtype): """Tests the structure and content of log_gradients with various fixed parameters.""" - dist = Normal(loc=1.0, scale=2.0) + dist = Normal(loc=1.0, scale=2.0, dtype=dtype) for param in fixed_params: dist.fix_param(param) @@ -194,68 +223,57 @@ def test_log_gradients_structure(self, fixed_params, expected_cols, expected_par np.testing.assert_allclose(gradients[:, idx], dist._dlog_scale(x)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestNormalGenerate: """Tests for the generate method.""" - def test_generate_type_and_shape(self): + def test_generate_type_and_shape(self, dtype): """Tests that generated samples have the correct type and shape.""" - dist = Normal(loc=0.0, scale=1.0) + dist = Normal(loc=0.0, scale=1.0, dtype=dtype) samples = dist.generate(size=100) assert isinstance(samples, np.ndarray) - assert samples.dtype == np.float64 + assert samples.dtype == dtype assert samples.shape == (100,) - def test_generate_statistical_properties(self): + def test_generate_zero_size(self, dtype): + """Tests if the generating 0 number of samples returns an empty array""" + + dist = Normal(loc=0.0, scale=1.0, dtype=dtype) + assert len(dist.generate(size=0)) == 0 + + @pytest.mark.parametrize("size", [-1, -10]) + def test_generate_negative_size(self, size, dtype): + """Tests that generating a negative number of samples raises ValueError.""" + + dist = Normal(loc=0.0, scale=1.0, dtype=dtype) + + with pytest.raises(ValueError): + dist.generate(size=size) + + def test_generate_statistical_properties(self, dtype): """Tests if the generated samples have correct statistical properties (mean, variance).""" np.random.seed(123) random.seed(123) loc, scale = 15.0, 3.0 - dist = Normal(loc=loc, scale=scale) + dist = Normal(loc=loc, scale=scale, dtype=dtype) size = 50000 samples = dist.generate(size=size) - assert np.mean(samples) == pytest.approx(loc, rel=0.05) - assert np.var(samples) == pytest.approx(scale**2, rel=0.05) + assert np.mean(samples, dtype=np.float64) == pytest.approx(loc, rel=0.05) + assert np.var(samples, dtype=np.float64) == pytest.approx(scale**2, rel=0.05) - def test_generate_kolmogorov_smirnov(self): + def test_generate_kolmogorov_smirnov(self, dtype): """Performs a Kolmogorov-Smirnov test to check if samples fit the distribution.""" np.random.seed(456) random.seed(456) loc, scale = -10.0, 5.0 - dist = Normal(loc=loc, scale=scale) + dist = Normal(loc=loc, scale=scale, dtype=dtype) size = 1000 - expected_p_value = 0.05 samples = dist.generate(size=size) ks_statistic, p_value = kstest(samples, "norm", args=(loc, scale)) + expected_p_value = 0.05 assert p_value > expected_p_value - - -@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) -class TestNormalDType(DTypeHandlingMixin): - distribution_class = Normal - default_params: ClassVar[dict] = {"loc": 0.0, "scale": 1.0} - - def test_init_with_dtype_sets_correct_types(self, dtype): - self.check_init_with_dtype_sets_correct_types(dtype) - - @pytest.mark.parametrize("size", [0, 10]) - def test_generate_returns_correct_dtype(self, size, dtype): - self.check_generate_returns_correct_dtype(size, dtype) - - @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e3, 1e3))) - def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): - self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) - - @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) - def check_ppf_returns_correct_dtype(self, p_data, dtype): - self.check_ppf_returns_correct_dtype(p_data, dtype) - - @pytest.mark.parametrize("method_name", ["_dlog_loc", "_dlog_scale"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): - self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_pareto.py b/rework_tests/unit/distributions/test_pareto.py index 97886f16..176c181c 100644 --- a/rework_tests/unit/distributions/test_pareto.py +++ b/rework_tests/unit/distributions/test_pareto.py @@ -7,7 +7,6 @@ import random from pathlib import Path -from typing import ClassVar import numpy as np import pandas as pd @@ -16,7 +15,6 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions.pareto import Pareto -from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import kstest, pareto @@ -42,71 +40,72 @@ def load_r_test_cases(): return cases +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestParetoInitialization: """Tests for the __init__ method and basic properties.""" - def test_initialization_successful(self): + def test_initialization_successful(self, dtype): """Tests that the instance is initialized correctly with valid parameters.""" shape, scale = 0.5, 2.0 - dist = Pareto(shape=shape, scale=scale) - assert isinstance(dist.shape, float) - assert isinstance(dist.scale, float) - assert dist.shape == shape - assert dist.scale == scale + dist = Pareto(shape=shape, scale=scale, dtype=dtype) + assert dist.shape.dtype == dtype + assert dist.scale.dtype == dtype + assert dist.shape == dtype(shape) + assert dist.scale == dtype(scale) - def test_name_property(self): + def test_name_property(self, dtype): """Tests that the name property returns the correct string.""" - dist = Pareto(shape=1.0, scale=2.0) + dist = Pareto(shape=1.0, scale=2.0, dtype=dtype) assert dist.name == "Pareto" - def test_params_property(self): + def test_params_property(self, dtype): """Tests that the params property returns the correct set of parameter names.""" - dist = Pareto(shape=1.0, scale=1.0) + dist = Pareto(shape=1.0, scale=1.0, dtype=dtype) assert dist.params == {"shape", "scale"} - def test_shape_invariant_violation(self): + def test_shape_invariant_violation(self, dtype): """Tests that initializing with a non-positive shape raises a ValueError.""" with pytest.raises(ValueError, match="Shape parameter must be a positive"): - Pareto(shape=0.0, scale=1.0) + Pareto(shape=0.0, scale=1.0, dtype=dtype) with pytest.raises(ValueError, match="Shape parameter must be a positive"): - Pareto(shape=-1.0, scale=1.0) + Pareto(shape=-1.0, scale=1.0, dtype=dtype) - def test_scale_invariant_violation(self): + def test_scale_invariant_violation(self, dtype): """Tests that initializing with a non-positive scale raises a ValueError.""" with pytest.raises(ValueError, match="Scale parameter must be a positive"): - Pareto(shape=1.0, scale=0.0) + Pareto(shape=1.0, scale=0.0, dtype=dtype) with pytest.raises(ValueError, match="Scale parameter must be a positive"): - Pareto(shape=1.0, scale=-1.0) + Pareto(shape=1.0, scale=-1.0, dtype=dtype) - def test_shape_assignment_violation(self): + def test_shape_assignment_violation(self, dtype): """Tests that assigning a non-positive shape after initialization raises a ValueError.""" - dist = Pareto(shape=1.0, scale=1.0) + dist = Pareto(shape=1.0, scale=1.0, dtype=dtype) with pytest.raises(ValueError, match="Shape parameter must be a positive"): dist.shape = 0.0 with pytest.raises(ValueError, match="Shape parameter must be a positive"): dist.shape = -10.0 - def test_scale_assignment_violation(self): + def test_scale_assignment_violation(self, dtype): """Tests that assigning a non-positive scale after initialization raises a ValueError.""" - dist = Pareto(shape=1.0, scale=1.0) + dist = Pareto(shape=1.0, scale=1.0, dtype=dtype) with pytest.raises(ValueError, match="Scale parameter must be a positive"): dist.scale = 0.0 with pytest.raises(ValueError, match="Scale parameter must be a positive"): dist.scale = -10.0 - def test_repr_method(self): + def test_repr_method(self, dtype): """Tests that the __repr__ method provides a reproducible string.""" - dist = Pareto(shape=1.23, scale=4.56) + dist = Pareto(shape=1.23, scale=4.56, dtype=dtype) repr_str = repr(dist) - assert repr_str == "Pareto(shape=1.23, scale=4.56)" + assert repr_str == f"Pareto(shape={dist.shape}, scale={dist.scale}, dtype=np.{dtype.__name__})" recreated_dist = eval(repr_str) assert dist == recreated_dist @@ -115,13 +114,15 @@ def test_repr_method(self): class TestParetoPDF: """Tests for the pdf method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e2, 1e2))) - def test_pdf_properties(self, x): + def test_pdf_properties(self, x, dtype): """Tests that the PDF is non-negative and has the correct return type and shape.""" - dist = Pareto(shape=1.0, scale=2.0) + dist = Pareto(shape=1.0, scale=2.0, dtype=dtype) pdf_values = dist.pdf(x) assert isinstance(pdf_values, np.ndarray) + assert pdf_values.dtype == dtype assert pdf_values.shape == x.shape assert np.all(pdf_values >= 0) @@ -168,13 +169,15 @@ def test_pdf_outside_support(self, shape, scale, x): class TestParetoLPDF: """Tests for the lpdf (log-PDF) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given(shape=st_shape, scale=st_scale, x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_lpdf_return_type_and_shape(self, shape, scale, x): + def test_lpdf_return_type_and_shape(self, shape, scale, x, dtype): """Tests the return type and shape of the lpdf method.""" - dist = Pareto(shape=shape, scale=scale) + dist = Pareto(shape=shape, scale=scale, dtype=dtype) lpdf_values = dist.lpdf(x) assert isinstance(lpdf_values, np.ndarray) + assert lpdf_values.dtype == dtype assert lpdf_values.shape == x.shape @given(shape=st_shape, scale=st_scale, x=st.floats(1e-3, 1e3, allow_infinity=False, allow_nan=False)) @@ -198,17 +201,19 @@ def test_lpdf_outside_support(self, shape, scale, x): class TestParetoPPF: """Tests for the ppf (Percent Point Function) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given( shape=st_shape, scale=st_scale, p=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True)), ) - def test_ppf_return_type_and_shape(self, shape, scale, p): + def test_ppf_return_type_and_shape(self, shape, scale, p, dtype): """Tests the return type and shape of the ppf method.""" - dist = Pareto(shape=shape, scale=scale) + dist = Pareto(shape=shape, scale=scale, dtype=dtype) ppf_values = dist.ppf(p) assert isinstance(ppf_values, np.ndarray) + assert ppf_values.dtype == dtype assert ppf_values.shape == p.shape @given(shape=st_shape, scale=st_scale, p=st.floats(0, 1)) @@ -228,6 +233,7 @@ def test_ppf_invalid_input(self, p_val): assert np.isnan(dist.ppf(p_val)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestParetoGradients: """Tests for gradient calculation methods.""" @@ -235,41 +241,45 @@ class TestParetoGradients: @settings(suppress_health_check=[HealthCheck.filter_too_much]) @given(shape=st_shape, scale=st_scale, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(1e-3, 1e3))) - def test_dlog_shape_numerical(self, shape, scale, x): + def test_dlog_shape_numerical(self, shape, scale, x, dtype): """Checks the analytical gradient for 'shape' against a numerical approximation.""" assume(np.all(x > scale)) - dist = Pareto(shape=shape, scale=scale) - - lpdf_plus_h = Pareto(shape=shape + self.h, scale=scale).lpdf(x) - lpdf_minus_h = Pareto(shape=shape - self.h, scale=scale).lpdf(x) - - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Pareto(shape=shape, scale=scale, dtype=dtype) analytical_grad = dist._dlog_shape(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Pareto(shape=shape + self.h, scale=scale).lpdf(x) + lpdf_minus_h = Pareto(shape=shape - self.h, scale=scale).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @settings(suppress_health_check=[HealthCheck.filter_too_much]) @given(shape=st_shape, scale=st_scale, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(1e-3, 1e3))) - def test_dlog_scale_numerical(self, shape, scale, x): + def test_dlog_scale_numerical(self, shape, scale, x, dtype): """Checks the analytical gradient for 'scale' against a numerical approximation.""" assume(np.all(x > scale + self.h)) - dist = Pareto(shape=shape, scale=scale) - - lpdf_plus_h = Pareto(shape=shape, scale=scale + self.h).lpdf(x) - lpdf_minus_h = Pareto(shape=shape, scale=scale - self.h).lpdf(x) - - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Pareto(shape=shape, scale=scale, dtype=dtype) analytical_grad = dist._dlog_scale(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Pareto(shape=shape, scale=scale + self.h).lpdf(x) + lpdf_minus_h = Pareto(shape=shape, scale=scale - self.h).lpdf(x) + + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) @pytest.mark.parametrize( "fixed_params, expected_shape_col, expected_params", @@ -280,10 +290,10 @@ def test_dlog_scale_numerical(self, shape, scale, x): (["shape", "scale"], 0, []), ], ) - def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params): + def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params, dtype): """Tests the structure and content of log_gradients with various fixed parameters.""" - dist = Pareto(shape=1.0, scale=2.0) + dist = Pareto(shape=1.0, scale=2.0, dtype=dtype) for param in fixed_params: dist.fix_param(param) @@ -291,6 +301,7 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte gradients = dist.log_gradients(x) assert isinstance(gradients, np.ndarray) + assert gradients.dtype == dtype assert gradients.shape == (len(x), expected_shape_col) if "shape" in expected_params: @@ -301,43 +312,44 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte np.testing.assert_allclose(gradients[:, idx], dist._dlog_scale(x)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestParetoGenerate: """Tests for the generate method.""" - def test_generate_type_and_shape(self): + def test_generate_type_and_shape(self, dtype): """Tests that generated samples have the correct type and shape.""" np.random.seed(42) random.seed(42) - dist = Pareto(shape=1.0, scale=2.0) + dist = Pareto(shape=1.0, scale=2.0, dtype=dtype) size = 100 samples = dist.generate(size=size) assert isinstance(samples, np.ndarray) - assert samples.dtype == np.float64 + assert samples.dtype == dtype assert samples.shape == (size,) - def test_generate_zero_size(self): + def test_generate_zero_size(self, dtype): """Tests if the generating 0 number of samples returns an empty array""" - dist = Pareto(shape=1.0, scale=2.0) + dist = Pareto(shape=1.0, scale=2.0, dtype=dtype) assert len(dist.generate(size=0)) == 0 @pytest.mark.parametrize("size", [-1, -10]) - def test_generate_negative_size(self, size): + def test_generate_negative_size(self, size, dtype): """Tests that generating a negative number of samples raises ValueError.""" - dist = Pareto(shape=1.0, scale=2.0) + dist = Pareto(shape=1.0, scale=2.0, dtype=dtype) with pytest.raises(ValueError): dist.generate(size=size) - def test_generate_statistical_properties(self): + def test_generate_statistical_properties(self, dtype): """Tests if the generated samples have correct statistical properties (mean, variance).""" np.random.seed(123) random.seed(123) shape, scale = 5.0, 0.5 - dist = Pareto(shape=shape, scale=scale) + dist = Pareto(shape=shape, scale=scale, dtype=dtype) size = 20000 samples = dist.generate(size=size) @@ -345,47 +357,20 @@ def test_generate_statistical_properties(self): theoretical_mean = (shape * scale) / (shape - 1) theoretical_var = ((scale**2) * shape) / ((shape - 1) ** 2 * (shape - 2)) - assert np.mean(samples) == pytest.approx(theoretical_mean, rel=0.1) - assert np.var(samples) == pytest.approx(theoretical_var, rel=0.1) + assert np.mean(samples, dtype=np.float64) == pytest.approx(theoretical_mean, rel=0.1) + assert np.var(samples, dtype=np.float64) == pytest.approx(theoretical_var, rel=0.1) - def test_generate_kolmogorov_smirnov(self): + def test_generate_kolmogorov_smirnov(self, dtype): """Performs a Kolmogorov-Smirnov test to check if samples fit the distribution.""" np.random.seed(456) random.seed(456) - shape, scale, loc = 10.0, 2.0, 0.0 - dist = Pareto(shape=shape, scale=scale) + shape, scale = 10.0, 2.0 + dist = Pareto(shape=shape, scale=scale, dtype=dtype) size = 1000 samples = dist.generate(size=size) - ks_statistic, p_value = kstest(samples, "pareto", args=(shape, loc, scale)) + ks_statistic, p_value = kstest(samples, "pareto", args=(shape, 0, scale)) lower_bound = 0.05 assert p_value > lower_bound - - -@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) -class TestParetoDType(DTypeHandlingMixin): - distribution_class = Pareto - default_params: ClassVar[dict] = {"shape": 1.0, "scale": 2.0} - - def test_init_with_dtype_sets_correct_types(self, dtype): - self.check_init_with_dtype_sets_correct_types(dtype) - - @pytest.mark.parametrize("size", [0, 10]) - def test_generate_returns_correct_dtype(self, size, dtype): - self.check_generate_returns_correct_dtype(size, dtype) - - @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(2, 1e3))) - def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): - self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) - - @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) - def check_ppf_returns_correct_dtype(self, p_data, dtype): - self.check_ppf_returns_correct_dtype(p_data, dtype) - - @pytest.mark.parametrize("method_name", ["_dlog_shape", "_dlog_scale"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(2, 1e6))) - def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): - self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_uniform.py b/rework_tests/unit/distributions/test_uniform.py index 0aa61ef2..21a24064 100644 --- a/rework_tests/unit/distributions/test_uniform.py +++ b/rework_tests/unit/distributions/test_uniform.py @@ -5,7 +5,6 @@ __license__ = "SPDX-License-Identifier: MIT" import random -from typing import ClassVar import numpy as np import pytest @@ -13,7 +12,6 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Uniform -from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.stats import kstest, uniform @@ -30,50 +28,54 @@ def st_valid_border(draw): return left_border, right_border +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestUniformInitialization: """Tests for the __init__ method and basic properties.""" - def test_initialization_successful(self): + def test_initialization_successful(self, dtype): """Tests that the instance is initialized correctly with valid parameters.""" l_border, r_border = 0.5, 2.0 - dist = Uniform(left_border=l_border, right_border=r_border) - assert isinstance(dist.left_border, float) - assert isinstance(dist.right_border, float) - assert dist.left_border == l_border - assert dist.right_border == r_border + dist = Uniform(left_border=l_border, right_border=r_border, dtype=dtype) + assert dist.left_border.dtype == dtype + assert dist.right_border.dtype == dtype + assert dist.left_border == dtype(l_border) + assert dist.right_border == dtype(r_border) - def test_name_property(self): + def test_name_property(self, dtype): """Tests that the name property returns the correct string.""" - dist = Uniform(left_border=0.0, right_border=1.0) + dist = Uniform(left_border=0.0, right_border=1.0, dtype=dtype) assert dist.name == "Uniform" - def test_params_property(self): + def test_params_property(self, dtype): """Tests that the params property returns the correct set of parameter names.""" - dist = Uniform(left_border=0.0, right_border=1.0) + dist = Uniform(left_border=0.0, right_border=1.0, dtype=dtype) assert dist.params == {"left_border", "right_border"} - def test_invariant_violation(self): + def test_invariant_violation(self, dtype): """Tests that initializing with a infinite borders or left border bigger right border raises a ValueError.""" with pytest.raises(ValueError, match="right_border parameter must be strictly greater than left_border"): - Uniform(0.0, -1.0) + Uniform(0.0, -1.0, dtype=dtype) with pytest.raises(ValueError, match="right_border parameter must be strictly greater than left_border"): - Uniform(0.0, -2.0) + Uniform(0.0, -2.0, dtype=dtype) with pytest.raises(ValueError, match="right_border parameter must be strictly greater than left_border"): - Uniform(0.0, 0.0) + Uniform(0.0, 0.0, dtype=dtype) with pytest.raises(ValueError, match="Both borders should be finite values"): - Uniform(-np.inf, 0.0) + Uniform(-np.inf, 0.0, dtype=dtype) with pytest.raises(ValueError, match="Both borders should be finite values"): - Uniform(0.0, np.inf) + Uniform(0.0, np.inf, dtype=dtype) - def test_repr_method(self): + def test_repr_method(self, dtype): """Tests that the __repr__ method provides a reproducible string.""" - dist = Uniform(left_border=1.23, right_border=4.56) + dist = Uniform(left_border=1.23, right_border=4.56, dtype=dtype) repr_str = repr(dist) - assert repr_str == "Uniform(left_border=1.23, right_border=4.56)" + assert ( + repr_str == f"Uniform(left_border={dist.left_border}, right_border={dist.right_border}, " + f"dtype=np.{dtype.__name__})" + ) recreated_dist = eval(repr_str) assert dist == recreated_dist @@ -82,17 +84,19 @@ def test_repr_method(self): class TestUniformPDF: """Tests for the pdf method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given( borders=st_valid_border(), x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6)), ) - def test_pdf_properties(self, borders, x): + def test_pdf_properties(self, borders, x, dtype): """Tests that the PDF is non-negative and has the correct return type and shape.""" left_border, right_border = borders - dist = Uniform(left_border=left_border, right_border=right_border) + dist = Uniform(left_border=left_border, right_border=right_border, dtype=dtype) pdf_values = dist.pdf(x) assert isinstance(pdf_values, np.ndarray) + assert pdf_values.dtype == dtype assert pdf_values.shape == x.shape assert np.all(pdf_values >= 0) @@ -122,19 +126,56 @@ def test_pdf_outside_support(self, borders, x): assert dist.pdf(x_val) == 0.0 +class TestUniformLPDF: + """Tests for the lpdf (log-PDF) method using hypothesis.""" + + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) + @given( + borders=st_valid_border(), + x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6)), + ) + def test_lpdf_return_type_and_shape(self, borders, x, dtype): + """Tests the return type and shape of the lpdf method.""" + left_border, right_border = borders + dist = Uniform(left_border=left_border, right_border=right_border, dtype=dtype) + lpdf_values = dist.lpdf(x) + assert isinstance(lpdf_values, np.ndarray) + assert lpdf_values.dtype == dtype + assert lpdf_values.shape == x.shape + + @given(borders=st_valid_border(), x=st.floats(1e-6, 1e6)) + def test_lpdf_against_scipy(self, borders, x): + """Compares the custom LPDF implementation against scipy's implementation.""" + left_border, right_border = borders + dist = Uniform(left_border=left_border, right_border=right_border) + custom_lpdf = dist.lpdf(x) + scipy_lpdf = uniform.logpdf(x, loc=left_border, scale=right_border - left_border) + np.testing.assert_allclose(custom_lpdf, scipy_lpdf, atol=1e-9) + + @given(borders=st_valid_border(), x=st.floats(min_value=1e-6)) + def test_lpdf_outside_support(self, borders, x): + """Tests that the LPDF is -inf for values outside the support.""" + left_border, right_border = borders + dist = Uniform(left_border=left_border, right_border=right_border) + assert dist.lpdf(left_border - x) == -np.inf + assert dist.lpdf(right_border + x) == -np.inf + + class TestUniformPPF: """Tests for the ppf (Percent Point Function) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given( borders=st_valid_border(), p=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True)), ) - def test_ppf_return_type_and_shape(self, borders, p): + def test_ppf_return_type_and_shape(self, borders, p, dtype): """Tests the return type and shape of the ppf method.""" left_border, right_border = borders - dist = Uniform(left_border=left_border, right_border=right_border) + dist = Uniform(left_border=left_border, right_border=right_border, dtype=dtype) ppf_values = dist.ppf(p) assert isinstance(ppf_values, np.ndarray) + assert ppf_values.dtype == dtype assert ppf_values.shape == p.shape @given(borders=st_valid_border(), p=st.floats(0, 1)) @@ -176,56 +217,49 @@ def st_valid_grad_input(draw): return (left_border, right_border), x_values +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestUniformGradients: """Tests for gradient calculation methods.""" h = 1e-6 @given(input_data=st_valid_grad_input()) - def test_dlog_left_border_numerical(self, input_data): + def test_dlog_left_border_numerical(self, input_data, dtype): """Checks the analytical gradient for 'left_border' against a numerical approximation.""" borders, x = input_data left_border, right_border = borders - dist = Uniform(left_border=left_border, right_border=right_border) - - lpdf_plus_h = Uniform(left_border=left_border + self.h, right_border=right_border).lpdf(x) - lpdf_minus_h = Uniform(left_border=left_border - self.h, right_border=right_border).lpdf(x) - - finite_mask = np.isfinite(lpdf_plus_h) & np.isfinite(lpdf_minus_h) - - if np.any(finite_mask): - numerical_grad = np.zeros_like(x) - numerical_grad[finite_mask] = (lpdf_plus_h[finite_mask] - lpdf_minus_h[finite_mask]) / (2 * self.h) - - analytical_grad = dist._dlog_left_border(x) + dist = Uniform(left_border=left_border, right_border=right_border, dtype=dtype) + analytical_grad = dist._dlog_left_border(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Uniform(left_border=left_border + self.h, right_border=right_border).lpdf(x) + lpdf_minus_h = Uniform(left_border=left_border - self.h, right_border=right_border).lpdf(x) + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @given(input_data=st_valid_grad_input()) - def test_dlog_right_border_numerical(self, input_data): + def test_dlog_right_border_numerical(self, input_data, dtype): """Checks the analytical gradient for 'right_border' against a numerical approximation.""" borders, x = input_data left_border, right_border = borders - dist = Uniform(left_border=left_border, right_border=right_border) - - lpdf_plus_h = Uniform(left_border=left_border, right_border=right_border + self.h).lpdf(x) - lpdf_minus_h = Uniform(left_border=left_border, right_border=right_border - self.h).lpdf(x) - - finite_mask = np.isfinite(lpdf_plus_h) & np.isfinite(lpdf_minus_h) - - if np.any(finite_mask): - numerical_grad = np.zeros_like(x) - numerical_grad[finite_mask] = (lpdf_plus_h[finite_mask] - lpdf_minus_h[finite_mask]) / (2 * self.h) - - analytical_grad = dist._dlog_right_border(x) + dist = Uniform(left_border=left_border, right_border=right_border, dtype=dtype) + analytical_grad = dist._dlog_right_border(x) assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype assert analytical_grad.shape == x.shape - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) + + if dtype == np.float64: + lpdf_plus_h = Uniform(left_border=left_border, right_border=right_border + self.h).lpdf(x) + lpdf_minus_h = Uniform(left_border=left_border, right_border=right_border - self.h).lpdf(x) + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) @pytest.mark.parametrize( "fixed_params, expected_shape_col, expected_params", @@ -236,10 +270,10 @@ def test_dlog_right_border_numerical(self, input_data): (["left_border", "right_border"], 0, []), ], ) - def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params): + def test_log_gradients_structure(self, fixed_params, expected_shape_col, expected_params, dtype): """Tests the structure and content of log_gradients with various fixed parameters.""" - dist = Uniform(left_border=1.0, right_border=3.0) + dist = Uniform(left_border=1.0, right_border=3.0, dtype=dtype) for param in fixed_params: dist.fix_param(param) @@ -247,6 +281,7 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte gradients = dist.log_gradients(x) assert isinstance(gradients, np.ndarray) + assert gradients.dtype == dtype assert gradients.shape == (len(x), expected_shape_col) if "left_border" in expected_params: @@ -257,43 +292,44 @@ def test_log_gradients_structure(self, fixed_params, expected_shape_col, expecte np.testing.assert_allclose(gradients[:, idx], dist._dlog_right_border(x)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestUniformGenerate: """Tests for the generate method.""" - def test_generate_type_and_shape(self): + def test_generate_type_and_shape(self, dtype): """Tests that generated samples have the correct type and shape.""" np.random.seed(42) random.seed(42) - dist = Uniform(left_border=0.0, right_border=2.0) + dist = Uniform(left_border=0.0, right_border=2.0, dtype=dtype) size = 100 samples = dist.generate(size=size) assert isinstance(samples, np.ndarray) - assert samples.dtype == np.float64 + assert samples.dtype == dtype assert samples.shape == (size,) - def test_generate_zero_size(self): + def test_generate_zero_size(self, dtype): """Tests if the generating 0 number of samples returns an empty array""" - dist = Uniform(left_border=0.0, right_border=1.0) + dist = Uniform(left_border=0.0, right_border=1.0, dtype=dtype) assert len(dist.generate(size=0)) == 0 @pytest.mark.parametrize("size", [-1, -10]) - def test_generate_negative_size(self, size): + def test_generate_negative_size(self, size, dtype): """Tests that generating a negative number of samples raises ValueError.""" - dist = Uniform(left_border=0.0, right_border=1.0) + dist = Uniform(left_border=0.0, right_border=1.0, dtype=dtype) with pytest.raises(ValueError): dist.generate(size=size) - def test_generate_statistical_properties(self): + def test_generate_statistical_properties(self, dtype): """Tests if the generated samples have correct statistical properties (mean, variance).""" np.random.seed(123) random.seed(123) left_border, right_border = 5.0, 5.5 - dist = Uniform(left_border=left_border, right_border=right_border) + dist = Uniform(left_border=left_border, right_border=right_border, dtype=dtype) size = 20000 samples = dist.generate(size=size) @@ -301,16 +337,16 @@ def test_generate_statistical_properties(self): theoretical_mean = (right_border + left_border) / 2 theoretical_var = (right_border - left_border) ** 2 / 12 - assert np.mean(samples) == pytest.approx(theoretical_mean, rel=0.1) - assert np.var(samples) == pytest.approx(theoretical_var, rel=0.1) + assert np.mean(samples, dtype=np.float64) == pytest.approx(theoretical_mean, rel=0.1) + assert np.var(samples, dtype=np.float64) == pytest.approx(theoretical_var, rel=0.1) - def test_generate_kolmogorov_smirnov(self): + def test_generate_kolmogorov_smirnov(self, dtype): """Performs a Kolmogorov-Smirnov test to check if samples fit the distribution.""" np.random.seed(456) random.seed(456) left_border, right_border = 10.0, 12.0 - dist = Uniform(left_border=left_border, right_border=right_border) + dist = Uniform(left_border=left_border, right_border=right_border, dtype=dtype) size = 1000 samples = dist.generate(size=size) @@ -318,30 +354,3 @@ def test_generate_kolmogorov_smirnov(self): ks_statistic, p_value = kstest(samples, "uniform", args=(left_border, right_border - left_border)) lower_bound = 0.05 assert p_value > lower_bound - - -@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) -class TestUniformDType(DTypeHandlingMixin): - distribution_class = Uniform - default_params: ClassVar[dict] = {"left_border": 0.0, "right_border": 1.0} - - def test_init_with_dtype_sets_correct_types(self, dtype): - self.check_init_with_dtype_sets_correct_types(dtype) - - @pytest.mark.parametrize("size", [0, 10]) - def test_generate_returns_correct_dtype(self, size, dtype): - self.check_generate_returns_correct_dtype(size, dtype) - - @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e3, 1e3))) - def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): - self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) - - @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) - def check_ppf_returns_correct_dtype(self, p_data, dtype): - self.check_ppf_returns_correct_dtype(p_data, dtype) - - @pytest.mark.parametrize("method_name", ["_dlog_left_border", "_dlog_right_border"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6))) - def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): - self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) diff --git a/rework_tests/unit/distributions/test_weibull.py b/rework_tests/unit/distributions/test_weibull.py index 220f9f39..b334fa6f 100644 --- a/rework_tests/unit/distributions/test_weibull.py +++ b/rework_tests/unit/distributions/test_weibull.py @@ -6,7 +6,6 @@ import random -from typing import ClassVar import numpy as np import pytest @@ -14,7 +13,6 @@ from hypothesis import strategies as st from hypothesis.extra.numpy import arrays from rework_pysatl_mpest.distributions import Weibull -from rework_tests.unit.distributions.test_continuous_distribution import DTypeHandlingMixin from scipy.integrate import quad from scipy.special import gamma from scipy.stats import kstest, weibull_min @@ -27,53 +25,54 @@ st_scale = st.floats(min_value=0.5, max_value=10, allow_nan=False, allow_infinity=False) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestWeibullInitialization: """Tests for the __init__ method and basic properties.""" - def test_initialization_successful(self): + def test_initialization_successful(self, dtype): """Tests that the instance is initialized correctly with valid parameters.""" shape, loc, scale = 2.0, 0.5, 1.5 - dist = Weibull(shape=shape, loc=loc, scale=scale) - assert isinstance(dist.shape, float) - assert isinstance(dist.loc, float) - assert isinstance(dist.scale, float) - assert dist.shape == shape - assert dist.loc == loc - assert dist.scale == scale - - def test_name_property(self): + dist = Weibull(shape=shape, loc=loc, scale=scale, dtype=dtype) + assert dist.shape.dtype == dtype + assert dist.loc.dtype == dtype + assert dist.scale.dtype == dtype + assert dist.shape == dtype(shape) + assert dist.loc == dtype(loc) + assert dist.scale == dtype(scale) + + def test_name_property(self, dtype): """Tests that the name property returns the correct string.""" - dist = Weibull(shape=2.0, loc=0.0, scale=1.0) + dist = Weibull(shape=2.0, loc=0.0, scale=1.0, dtype=dtype) assert dist.name == "Weibull" - def test_params_property(self): + def test_params_property(self, dtype): """Tests that the params property returns the correct set of parameter names.""" - dist = Weibull(shape=2.0, loc=0.0, scale=1.0) + dist = Weibull(shape=2.0, loc=0.0, scale=1.0, dtype=dtype) assert dist.params == {"shape", "loc", "scale"} @pytest.mark.parametrize("invalid_shape", [0.0, -1.0, -10.0]) - def test_shape_invariant_violation(self, invalid_shape): + def test_shape_invariant_violation(self, invalid_shape, dtype): """Tests that initializing with a non-positive shape raises a ValueError.""" with pytest.raises(ValueError, match="Shape parameter must be positive"): - Weibull(shape=invalid_shape, loc=0.0, scale=1.0) + Weibull(shape=invalid_shape, loc=0.0, scale=1.0, dtype=dtype) @pytest.mark.parametrize("invalid_scale", [0.0, -1.0, -10.0]) - def test_scale_invariant_violation(self, invalid_scale): + def test_scale_invariant_violation(self, invalid_scale, dtype): """Tests that initializing with a non-positive scale raises a ValueError.""" with pytest.raises(ValueError, match="Scale parameter must be positive"): - Weibull(shape=1.0, loc=0.0, scale=invalid_scale) + Weibull(shape=1.0, loc=0.0, scale=invalid_scale, dtype=dtype) - def test_repr_method(self): + def test_repr_method(self, dtype): """Tests that the __repr__ method provides a reproducible string.""" - dist = Weibull(shape=1.23, loc=4.56, scale=7.89) + dist = Weibull(shape=1.23, loc=4.56, scale=7.89, dtype=dtype) repr_str = repr(dist) - assert repr_str == "Weibull(shape=1.23, loc=4.56, scale=7.89)" + assert repr_str == f"Weibull(shape={dist.shape}, loc={dist.loc}, scale={dist.scale}, dtype=np.{dtype.__name__})" recreated_dist = eval(repr_str) assert dist == recreated_dist @@ -82,18 +81,20 @@ def test_repr_method(self): class TestWeibullPDF: """Tests for the pdf method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given( shape=st_shape, loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6)), ) - def test_pdf_properties(self, shape, loc, scale, x): + def test_pdf_properties(self, shape, loc, scale, x, dtype): """Tests that the PDF is non-negative and has the correct return type and shape.""" - dist = Weibull(shape=shape, loc=loc, scale=scale) + dist = Weibull(shape=shape, loc=loc, scale=scale, dtype=dtype) pdf_values = dist.pdf(x) assert isinstance(pdf_values, np.ndarray) + assert pdf_values.dtype == dtype assert pdf_values.shape == x.shape assert np.all(pdf_values >= 0) @@ -127,18 +128,20 @@ def test_pdf_outside_support(self, shape, loc, scale, x): class TestWeibullLPDF: """Tests for the lpdf (log-PDF) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given( shape=st_shape, loc=st_loc, scale=st_scale, x=arrays(np.float64, st.integers(0, 10), elements=st.floats(-1e6, 1e6)), ) - def test_lpdf_return_type_and_shape(self, shape, loc, scale, x): + def test_lpdf_return_type_and_shape(self, shape, loc, scale, x, dtype): """Tests the return type and shape of the lpdf method.""" - dist = Weibull(shape=shape, loc=loc, scale=scale) + dist = Weibull(shape=shape, loc=loc, scale=scale, dtype=dtype) lpdf_values = dist.lpdf(x) assert isinstance(lpdf_values, np.ndarray) + assert lpdf_values.dtype == dtype assert lpdf_values.shape == x.shape @given(shape=st_shape, loc=st_loc, scale=st_scale, x=st.floats(1e-6, 1e6)) @@ -163,18 +166,20 @@ def test_lpdf_outside_support(self, shape, loc, scale, x): class TestWeibullPPF: """Tests for the ppf (Percent Point Function) method using hypothesis.""" + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) @given( shape=st_shape, loc=st_loc, scale=st_scale, p=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True)), ) - def test_ppf_return_type_and_shape(self, shape, loc, scale, p): + def test_ppf_return_type_and_shape(self, shape, loc, scale, p, dtype): """Tests the return type and shape of the ppf method.""" - dist = Weibull(shape=shape, loc=loc, scale=scale) + dist = Weibull(shape=shape, loc=loc, scale=scale, dtype=dtype) ppf_values = dist.ppf(p) assert isinstance(ppf_values, np.ndarray) + assert ppf_values.dtype == dtype assert ppf_values.shape == p.shape @given(shape=st_shape, loc=st_loc, scale=st_scale, p=st.floats(0.01, 1)) @@ -186,7 +191,15 @@ def test_ppf_against_scipy(self, shape, loc, scale, p): scipy_ppf = weibull_min.ppf(p, c=shape, loc=loc, scale=scale) np.testing.assert_allclose(custom_ppf, scipy_ppf, atol=1e-6) + @pytest.mark.parametrize("p_val", [-0.5, 1.1, 1.5]) + def test_ppf_invalid_input(self, p_val): + """Tests that PPF returns NaN for probabilities outside the [0, 1) range.""" + + dist = Weibull(shape=1.0, loc=0.0, scale=1.0) + assert np.isnan(dist.ppf(p_val)) + +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestWeibullGradients: """Tests for gradient calculation methods.""" @@ -198,16 +211,21 @@ class TestWeibullGradients: scale=st_scale, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(1e-3, 1e3)), ) - def test_dlog_shape_numerical(self, shape, loc, scale, x): + def test_dlog_shape_numerical(self, shape, loc, scale, x, dtype): """Checks the analytical gradient for 'shape' against a numerical approximation.""" - assume(np.all(x > loc + self.h)) - dist = Weibull(shape=shape, loc=loc, scale=scale) - lpdf_plus_h = Weibull(shape=shape + self.h, loc=loc, scale=scale).lpdf(x) - lpdf_minus_h = Weibull(shape=shape - self.h, loc=loc, scale=scale).lpdf(x) - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + assume(np.all(x > loc)) + dist = Weibull(shape=shape, loc=loc, scale=scale, dtype=dtype) analytical_grad = dist._dlog_shape(x) - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype + assert analytical_grad.shape == x.shape + + if dtype == np.float64: + lpdf_plus_h = Weibull(shape=shape + self.h, loc=loc, scale=scale).lpdf(x) + lpdf_minus_h = Weibull(shape=shape - self.h, loc=loc, scale=scale).lpdf(x) + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @given( shape=st_shape, @@ -215,16 +233,21 @@ def test_dlog_shape_numerical(self, shape, loc, scale, x): scale=st_scale, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(1e-3, 1e3)), ) - def test_dlog_loc_numerical(self, shape, loc, scale, x): + def test_dlog_loc_numerical(self, shape, loc, scale, x, dtype): """Checks the analytical gradient for 'loc' against a numerical approximation.""" assume(np.all(x > (loc + self.h))) - dist = Weibull(shape=shape, loc=loc, scale=scale) - lpdf_plus_h = Weibull(shape=shape, loc=loc + self.h, scale=scale).lpdf(x) - lpdf_minus_h = Weibull(shape=shape, loc=loc - self.h, scale=scale).lpdf(x) - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + dist = Weibull(shape=shape, loc=loc, scale=scale, dtype=dtype) analytical_grad = dist._dlog_loc(x) - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) + assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype + assert analytical_grad.shape == x.shape + + if dtype == np.float64: + lpdf_plus_h = Weibull(shape=shape, loc=loc + self.h, scale=scale).lpdf(x) + lpdf_minus_h = Weibull(shape=shape, loc=loc - self.h, scale=scale).lpdf(x) + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-4, rtol=1e-3) @given( shape=st_shape, @@ -232,16 +255,21 @@ def test_dlog_loc_numerical(self, shape, loc, scale, x): scale=st_scale, x=arrays(np.float64, st.integers(1, 10), elements=st.floats(1e-3, 1e3)), ) - def test_dlog_scale_numerical(self, shape, loc, scale, x): + def test_dlog_scale_numerical(self, shape, loc, scale, x, dtype): """Checks the analytical gradient for 'scale' against a numerical approximation.""" - assume(np.all(x > loc + self.h)) - dist = Weibull(shape=shape, loc=loc, scale=scale) - lpdf_plus_h = Weibull(shape=shape, loc=loc, scale=scale + self.h).lpdf(x) - lpdf_minus_h = Weibull(shape=shape, loc=loc, scale=scale - self.h).lpdf(x) - numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + assume(np.all(x > loc)) + dist = Weibull(shape=shape, loc=loc, scale=scale, dtype=dtype) analytical_grad = dist._dlog_scale(x) - np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) + assert isinstance(analytical_grad, np.ndarray) + assert analytical_grad.dtype == dtype + assert analytical_grad.shape == x.shape + + if dtype == np.float64: + lpdf_plus_h = Weibull(shape=shape, loc=loc, scale=scale + self.h).lpdf(x) + lpdf_minus_h = Weibull(shape=shape, loc=loc, scale=scale - self.h).lpdf(x) + numerical_grad = (lpdf_plus_h - lpdf_minus_h) / (2 * self.h) + np.testing.assert_allclose(analytical_grad, numerical_grad, atol=1e-3, rtol=1e-3) @pytest.mark.parametrize( "fixed_params, expected_cols, expected_params", @@ -256,10 +284,10 @@ def test_dlog_scale_numerical(self, shape, loc, scale, x): (["loc", "scale", "shape"], 0, []), ], ) - def test_log_gradients_structure(self, fixed_params, expected_cols, expected_params): + def test_log_gradients_structure(self, fixed_params, expected_cols, expected_params, dtype): """Tests the structure and content of log_gradients with various fixed parameters.""" - dist = Weibull(shape=2.0, loc=1.0, scale=3.0) + dist = Weibull(shape=2.0, loc=1.0, scale=3.0, dtype=dtype) for param in fixed_params: dist.fix_param(param) @@ -267,6 +295,7 @@ def test_log_gradients_structure(self, fixed_params, expected_cols, expected_par gradients = dist.log_gradients(x) assert isinstance(gradients, np.ndarray) + assert gradients.dtype == dtype assert gradients.shape == (len(x), expected_cols) sorted_params = sorted(expected_params) @@ -278,40 +307,41 @@ def test_log_gradients_structure(self, fixed_params, expected_cols, expected_par np.testing.assert_allclose(gradients[:, sorted_params.index("shape")], dist._dlog_shape(x)) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestWeibullGenerate: """Tests for the generate method.""" - def test_generate_type_and_shape(self): + def test_generate_type_and_shape(self, dtype): """Tests that generated samples have the correct type and shape.""" - dist = Weibull(shape=2.0, loc=0.0, scale=1.0) + dist = Weibull(shape=2.0, loc=0.0, scale=1.0, dtype=dtype) size = 100 samples = dist.generate(size=size) assert isinstance(samples, np.ndarray) - assert samples.dtype == np.float64 + assert samples.dtype == dtype assert samples.shape == (size,) - def test_generate_zero_size(self): + def test_generate_zero_size(self, dtype): """Tests if generating 0 number of samples returns an empty array""" - dist = Weibull(shape=2.0, loc=0.0, scale=1.0) + dist = Weibull(shape=2.0, loc=0.0, scale=1.0, dtype=dtype) assert len(dist.generate(size=0)) == 0 @pytest.mark.parametrize("size", [-1, -10]) - def test_generate_negative_size(self, size): + def test_generate_negative_size(self, size, dtype): """Tests that generating a negative number of samples raises ValueError.""" - dist = Weibull(shape=2.0, loc=0.0, scale=1.0) + dist = Weibull(shape=2.0, loc=0.0, scale=1.0, dtype=dtype) with pytest.raises(ValueError): dist.generate(size=size) - def test_generate_statistical_properties(self): + def test_generate_statistical_properties(self, dtype): """Tests if the generated samples have correct statistical properties (mean, variance).""" np.random.seed(123) random.seed(123) shape, loc, scale = 2.5, 5.0, 3.0 - dist = Weibull(shape=shape, loc=loc, scale=scale) + dist = Weibull(shape=shape, loc=loc, scale=scale, dtype=dtype) size = 50000 samples = dist.generate(size=size) @@ -322,48 +352,21 @@ def test_generate_statistical_properties(self): theoretical_mean = loc + scale * g1 theoretical_var = (scale**2) * (g2 - g1**2) - assert np.mean(samples) == pytest.approx(theoretical_mean, rel=0.05) - assert np.var(samples) == pytest.approx(theoretical_var, rel=0.05) + assert np.mean(samples, dtype=np.float64) == pytest.approx(theoretical_mean, rel=0.05) + assert np.var(samples, dtype=np.float64) == pytest.approx(theoretical_var, rel=0.05) - def test_generate_kolmogorov_smirnov(self): + def test_generate_kolmogorov_smirnov(self, dtype): """Performs a Kolmogorov-Smirnov test to check if samples fit the distribution.""" np.random.seed(456) random.seed(456) shape, loc, scale = 3.0, 10.0, 2.0 - dist = Weibull(shape=shape, loc=loc, scale=scale) + dist = Weibull(shape=shape, loc=loc, scale=scale, dtype=dtype) size = 1000 - expected_p_value = 0.05 samples = dist.generate(size=size) # args for scipy's weibull_min are (shape, loc, scale) ks_statistic, p_value = kstest(samples, "weibull_min", args=(shape, loc, scale)) + expected_p_value = 0.05 assert p_value > expected_p_value - - -@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) -class TestWeibullDType(DTypeHandlingMixin): - distribution_class = Weibull - default_params: ClassVar[dict] = {"shape": 2.0, "loc": 0.0, "scale": 1.0} - - def test_init_with_dtype_sets_correct_types(self, dtype): - self.check_init_with_dtype_sets_correct_types(dtype) - - @pytest.mark.parametrize("size", [0, 10]) - def test_generate_returns_correct_dtype(self, size, dtype): - self.check_generate_returns_correct_dtype(size, dtype) - - @pytest.mark.parametrize("method_name", ["pdf", "lpdf", "log_gradients"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e3))) - def check_methods_taking_x_return_correct_dtype(self, method_name, x_data, dtype): - self.check_methods_taking_x_return_correct_dtype(method_name, x_data, dtype) - - @given(p_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1, exclude_max=True))) - def check_ppf_returns_correct_dtype(self, p_data, dtype): - self.check_ppf_returns_correct_dtype(p_data, dtype) - - @pytest.mark.parametrize("method_name", ["_dlog_shape", "_dlog_loc", "_dlog_scale"]) - @given(x_data=arrays(np.float64, st.integers(0, 10), elements=st.floats(0, 1e6))) - def test_dlog_methods_returns_correct_dtype(self, x_data, method_name, dtype): - self.check_dlog_methods_returns_correct_dtype(x_data, method_name, dtype) From 0007f284fb9018b807972b0b1372a3cac4224aa3 Mon Sep 17 00:00:00 2001 From: xImoZA Date: Fri, 14 Nov 2025 15:52:22 +0300 Subject: [PATCH 5/7] chore(tests): standardize mixture and parameter tests got dtype --- rework_pysatl_mpest/core/mixture.py | 4 +- rework_pysatl_mpest/distributions/uniform.py | 10 +- rework_tests/unit/core/test_mixture.py | 257 +++++++++---------- rework_tests/unit/core/test_parameter.py | 80 ++---- 4 files changed, 152 insertions(+), 199 deletions(-) diff --git a/rework_pysatl_mpest/core/mixture.py b/rework_pysatl_mpest/core/mixture.py index eeecdfbd..4c6789fb 100644 --- a/rework_pysatl_mpest/core/mixture.py +++ b/rework_pysatl_mpest/core/mixture.py @@ -134,6 +134,7 @@ def _validate_weights(self, n_components: int, weights: NDArray[DType]): @property def dtype(self) -> type[DType]: """type[DType]: The numpy data type of the mixture's outputs.""" + return self._dtype @property @@ -396,7 +397,8 @@ def _get_sorted_pairs(self, for_hashing: bool = False) -> list[tuple["Continuous if self._sorted_pairs_cache is None or for_hashing: weights_to_use = self.weights if for_hashing: - weights_to_use = np.round(weights_to_use, 8) + decimals = np.finfo(self.dtype).precision + weights_to_use = np.round(weights_to_use, decimals) pairs = sorted(zip(self.components, weights_to_use), key=lambda p: hash(p[0])) if not for_hashing: diff --git a/rework_pysatl_mpest/distributions/uniform.py b/rework_pysatl_mpest/distributions/uniform.py index 75f80ebb..03cf75cf 100644 --- a/rework_pysatl_mpest/distributions/uniform.py +++ b/rework_pysatl_mpest/distributions/uniform.py @@ -97,7 +97,7 @@ def pdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The PDF values corresponding to each point in :attr:`X`. """ X = np.asarray(X, dtype=self.dtype) @@ -128,7 +128,7 @@ def ppf(self, P): Returns ------- - NDArray[np.float64] + NDArray[DType] The PPF values corresponding to each probability in :attr:`P`. """ P = np.asarray(P, dtype=self.dtype) @@ -158,7 +158,7 @@ def lpdf(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] The log-PDF values corresponding to each point in :attr:`X`. """ X = np.asarray(X, dtype=self.dtype) @@ -214,7 +214,7 @@ def log_gradients(self, X): Returns ------- - NDArray[np.float64] + NDArray[DType] An array where each row corresponds to a data point in :attr:`X` and each column corresponds to the gradient with respect to a specific optimizable parameter. The order of columns corresponds @@ -247,7 +247,7 @@ def generate(self, size: int): Returns ------- - NDArray[np.float64] + NDArray[DType] A NumPy array containing the generated samples. """ diff --git a/rework_tests/unit/core/test_mixture.py b/rework_tests/unit/core/test_mixture.py index cf834996..db061b74 100644 --- a/rework_tests/unit/core/test_mixture.py +++ b/rework_tests/unit/core/test_mixture.py @@ -16,6 +16,8 @@ from rework_pysatl_mpest.core import MixtureModel from rework_pysatl_mpest.distributions import ContinuousDistribution, Exponential +DTYPES_TO_TEST = [np.float16, np.float32, np.float64] + @pytest.fixture def exp_components() -> tuple[Exponential, Exponential]: @@ -24,13 +26,15 @@ def exp_components() -> tuple[Exponential, Exponential]: return Exponential(loc=0.0, rate=1.0), Exponential(loc=5.0, rate=2.0) -@pytest.fixture -def mixture_model(exp_components: tuple[Exponential, Exponential]) -> MixtureModel: +@pytest.fixture(params=DTYPES_TO_TEST) +def mixture_model(exp_components: tuple[Exponential, Exponential], request) -> MixtureModel: """Provides a standard MixtureModel instance with two components and equal weights.""" - return MixtureModel(components=exp_components, weights=[0.5, 0.5]) + dtype = request.param + return MixtureModel(components=exp_components, weights=[0.5, 0.5], dtype=dtype) +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestMixtureModelInitialization: """Tests for the __init__ method of MixtureModel.""" @@ -41,70 +45,85 @@ class TestMixtureModelInitialization: [Exponential(loc=0, rate=1), Exponential(loc=1, rate=2)], ], ) - def test_init_with_valid_component_sequences(self, components_seq: Sequence[ContinuousDistribution]): + def test_init_with_valid_component_sequences(self, components_seq: Sequence[ContinuousDistribution], dtype): """Tests that MixtureModel can be initialized with different sequence types.""" - model = MixtureModel(components=components_seq) + model = MixtureModel(components=components_seq, dtype=dtype) expected_n_components = 2 assert model.n_components == expected_n_components assert isinstance(model.components, tuple) - def test_init_with_equal_weights_by_default(self, exp_components: tuple[Exponential, Exponential]): + def test_init_with_equal_weights_by_default(self, exp_components: tuple[Exponential, Exponential], dtype): """Tests that weights are distributed equally when not provided.""" - model = MixtureModel(components=exp_components) + model = MixtureModel(components=exp_components, dtype=dtype) expected_n_components = 2 assert model.n_components == expected_n_components np.testing.assert_allclose(model.weights, [0.5, 0.5]) - def test_init_with_specified_weights(self, exp_components: tuple[Exponential, Exponential]): + def test_init_with_specified_weights(self, exp_components: tuple[Exponential, Exponential], dtype): """Tests initialization with correctly specified weights.""" weights = [0.3, 0.7] - model = MixtureModel(components=exp_components, weights=weights) + model = MixtureModel(components=exp_components, weights=weights, dtype=dtype) expected_n_components = 2 assert model.n_components == expected_n_components - np.testing.assert_allclose(model.weights, weights) - np.testing.assert_allclose(np.exp(model.log_weights), weights, atol=1e-9) + if dtype == np.float64: + np.testing.assert_allclose(model.weights, weights) + np.testing.assert_allclose(np.exp(model.log_weights), weights, atol=1e-9) @pytest.mark.parametrize( "invalid_weights, error_msg", [ ([0.5, 0.5, 0.5], "Components number \\(2\\) must be equal to weights number \\(3\\)."), ([-0.2, 1.2], "Weights must be positive."), - ([0.4, 0.4], "Sum of the weights must be equal 1, but it equal 0.8."), ], ) def test_init_with_invalid_weights_raises_value_error( - self, exp_components: tuple[Exponential, Exponential], invalid_weights: list[float], error_msg: str + self, exp_components: tuple[Exponential, Exponential], invalid_weights: list[float], error_msg: str, dtype ): """Tests that initialization with invalid weights raises a ValueError.""" with pytest.raises(ValueError, match=error_msg): - MixtureModel(components=exp_components, weights=invalid_weights) + MixtureModel(components=exp_components, weights=invalid_weights, dtype=dtype) + + @pytest.mark.parametrize( + "invalid_weights, error_msg", + [ + ([0.4, 0.4], "Sum of the weights must be equal 1, but it equal "), + ], + ) + def test_init_with_invalid_sum_of_weights_raises_value_error( + self, exp_components: tuple[Exponential, Exponential], invalid_weights: list[float], error_msg: str, dtype + ): + """Tests that initialization with invalid sum of weights raises a ValueError.""" + with pytest.raises(ValueError) as excinfo: + MixtureModel(components=exp_components, weights=invalid_weights, dtype=dtype) + + actual_error_msg = str(excinfo.value) + assert actual_error_msg.startswith(error_msg) - def test_init_with_empty_components_raises_value_error(self): + def test_init_with_empty_components_raises_value_error(self, dtype): """Tests that initialization with an empty component list raises a ValueError.""" with pytest.raises(ValueError, match="List of components cannot be an empty"): - MixtureModel(components=[]) + MixtureModel(components=[], dtype=dtype) - def test_init_casts_component_dtypes(self): + def test_init_casts_component_dtypes(self, dtype): """Tests that the MixtureModel casts all components to its own dtype during initialization.""" + comp1_f64 = Exponential(loc=0.0, rate=1.0) comp2_f64 = Exponential(loc=5.0, rate=2.0) assert comp1_f64.dtype == np.float64 - target_dtype = np.float32 - mixture = MixtureModel(components=[comp1_f64, comp2_f64], dtype=target_dtype) - - assert mixture.dtype == target_dtype - assert mixture.weights.dtype == target_dtype + mixture = MixtureModel(components=[comp1_f64, comp2_f64], dtype=dtype) + assert mixture.dtype == dtype + assert mixture.weights.dtype == dtype for component in mixture.components: - assert component.dtype == target_dtype + assert component.dtype == dtype for param in component.params: - assert isinstance(getattr(component, param), target_dtype) + assert isinstance(getattr(component, param), dtype) # Original components have not changed for component in [comp1_f64, comp2_f64]: @@ -112,14 +131,14 @@ def test_init_casts_component_dtypes(self): for param in component.params: assert isinstance(getattr(component, param), np.float64) - def test_init_does_not_recreate_components_with_correct_dtype(self): + def test_init_does_not_recreate_components_with_correct_dtype(self, dtype): """Tests that components with the correct dtype are not recreated.""" - target_dtype = np.float32 - comp_f32 = Exponential(loc=0.0, rate=1.0, dtype=target_dtype) + + comp_f32 = Exponential(loc=0.0, rate=1.0, dtype=dtype) original_id = id(comp_f32) - mixture = MixtureModel(components=[comp_f32], dtype=target_dtype) + mixture = MixtureModel(components=[comp_f32], dtype=dtype) assert id(mixture.components[0]) == original_id @@ -141,10 +160,11 @@ def test_components_property_is_immutable_tuple(self, mixture_model: MixtureMode with pytest.raises(TypeError): components[0] = Exponential(0, 1) # type: ignore - def test_weights_caching(self, exp_components: tuple[Exponential, Exponential]): + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) + def test_weights_caching(self, exp_components: tuple[Exponential, Exponential], dtype): """Tests the caching mechanism of the 'weights' property.""" - model = MixtureModel(components=exp_components) + model = MixtureModel(components=exp_components, dtype=dtype) assert model._cached_weights is None first_access_weights = model.weights @@ -164,12 +184,12 @@ def test_log_weights_setter_and_cache_invalidation(self, mixture_model: MixtureM mixture_model.log_weights = new_log_weights assert mixture_model._cached_weights is None - np.testing.assert_allclose(mixture_model.log_weights, new_log_weights) + np.testing.assert_allclose(mixture_model.log_weights, new_log_weights, atol=1e-3) new_weights = mixture_model.weights - np.testing.assert_allclose(new_weights, [0.3, 0.7]) + np.testing.assert_allclose(new_weights, [0.3, 0.7], atol=1e-3) assert id(old_weights) != id(new_weights) - assert np.isclose(np.sum(new_weights), 1.0) + assert np.isclose(np.sum(new_weights), 1.0, atol=1e-3) def test_log_weights_setter_with_invalid_shape_raises_error(self, mixture_model: MixtureModel): """Tests that setting log_weights with an incorrect shape raises a ValueError.""" @@ -177,13 +197,11 @@ def test_log_weights_setter_with_invalid_shape_raises_error(self, mixture_model: with pytest.raises(ValueError, match="The length of the new logit vector does not match"): mixture_model.log_weights = np.log([0.1, 0.2, 0.7]) - def test_properties_have_correct_dtype(self): + def test_properties_have_correct_dtype(self, mixture_model: MixtureModel): """Tests that checks the dtype of the weights and log_weights properties.""" - target_dtype = np.float32 - mixture = MixtureModel([Exponential(0, 1)], dtype=target_dtype) - assert mixture.weights.dtype == target_dtype - assert mixture.log_weights.dtype == target_dtype + assert mixture_model.weights.dtype == mixture_model.dtype + assert mixture_model.log_weights.dtype == mixture_model.dtype class TestMixtureModelModification: @@ -192,8 +210,10 @@ class TestMixtureModelModification: def test_add_component(self, mixture_model: MixtureModel): """Tests adding a new component and verifies weight preservation and renormalization.""" + dtype = mixture_model.dtype + old_weights = mixture_model.weights.copy() - new_component = Exponential(loc=10, rate=3) + new_component = Exponential(loc=10, rate=3, dtype=dtype) new_weight = 0.4 mixture_model.add_component(new_component, weight=new_weight) @@ -217,11 +237,12 @@ def test_add_component_with_invalid_weight_raises_error(self, mixture_model: Mix with pytest.raises(ValueError, match="The weight of the new component must be in the range"): mixture_model.add_component(Exponential(10, 3), weight=invalid_weight) - def test_add_component_casts_dtype(self): + @pytest.mark.parametrize("dtype", DTYPES_TO_TEST) + def test_add_component_casts_dtype(self, dtype): """Tests that the add_component method casts the type of the new component to the dtype of the mixture.""" + comp = Exponential(loc=0.0, rate=1.0) - target_dtype = np.float32 - mixture = MixtureModel(components=[comp], dtype=target_dtype) + mixture = MixtureModel(components=[comp], dtype=dtype) new_comp_f64 = Exponential(loc=10.0, rate=2.0) assert new_comp_f64.dtype == np.float64 @@ -229,8 +250,8 @@ def test_add_component_casts_dtype(self): mixture.add_component(new_comp_f64, weight=0.3) added_component_in_mixture = mixture.components[-1] - assert added_component_in_mixture.dtype == target_dtype - assert isinstance(added_component_in_mixture.loc, target_dtype) + assert added_component_in_mixture.dtype == dtype + assert isinstance(added_component_in_mixture.loc, dtype) # Original component have not changed assert comp.dtype == np.float64 @@ -276,6 +297,7 @@ class TestMixtureModelCalculations: def test_pdf_calculation(self, mixture_model: MixtureModel, X): """Tests the PDF calculation against the definition.""" + dtype = mixture_model.dtype c1, c2 = mixture_model.components w1, w2 = mixture_model.weights @@ -283,49 +305,35 @@ def test_pdf_calculation(self, mixture_model: MixtureModel, X): calculated_pdf = mixture_model.pdf(X) assert isinstance(calculated_pdf, np.ndarray) - np.testing.assert_allclose(calculated_pdf, expected_pdf) + assert calculated_pdf.dtype == dtype + if dtype == np.float64: + np.testing.assert_allclose(calculated_pdf, expected_pdf) @pytest.mark.parametrize("X", [1.5, [1.5], np.array([1.0, 1.5, 6.0])]) def test_lpdf_calculation(self, mixture_model: MixtureModel, X): """Tests the LPDF calculation against the definition.""" + dtype = mixture_model.dtype + expected_lpdf = np.log(mixture_model.pdf(X)) calculated_lpdf = mixture_model.lpdf(X) assert isinstance(calculated_lpdf, np.ndarray) - np.testing.assert_allclose(calculated_lpdf, expected_lpdf) - - @pytest.mark.parametrize("method_name", ["pdf", "lpdf"]) - def test_pdf_lpdf_methods_return_correct_dtype(self, method_name: str): - """Tests that pdf and lpdf methods return arrays of the correct dtype.""" - target_dtype = np.float32 - mixture = MixtureModel([Exponential(0, 1)], dtype=target_dtype) - method_to_test = getattr(mixture, method_name) - - input_x = np.array([1.0, 2.0, 3.0]) - result = method_to_test(input_x) - - assert result.dtype == target_dtype + assert calculated_lpdf.dtype == dtype + if dtype == np.float64: + np.testing.assert_allclose(calculated_lpdf, expected_lpdf) def test_loglikelihood_calculation(self, mixture_model: MixtureModel): """Tests that loglikelihood is the sum of LPDF values.""" + dtype = mixture_model.dtype X = np.array([1.0, 1.5, 6.0]) expected_loglikelihood = np.sum(mixture_model.lpdf(X)) calculated_loglikelihood = mixture_model.loglikelihood(X) - assert isinstance(calculated_loglikelihood, np.float64) + assert isinstance(calculated_loglikelihood, dtype) assert np.isclose(calculated_loglikelihood, expected_loglikelihood) - def test_loglikelihood_returns_numpy_scalar_with_correct_dtype(self): - """Tests that checks that loglikelihood returns a NumPy scalar of the correct type.""" - target_dtype = np.float32 - mixture = MixtureModel([Exponential(0, 1)], dtype=target_dtype) - - loglik = mixture.loglikelihood(np.array([1, 2, 3])) - - assert isinstance(loglik, target_dtype) - class TestMixtureModelGenerate: """Statistical tests for the `generate` method.""" @@ -334,9 +342,13 @@ def test_generate_returns_correct_size(self, mixture_model: MixtureModel): """Tests that generate returns an array of the requested size.""" size = 100 + dtype = mixture_model.dtype + + samples = mixture_model.generate(size=size) - assert len(mixture_model.generate(size=size)) == size - assert isinstance(mixture_model.generate(size=size), np.ndarray) + assert len(samples) == size + assert isinstance(samples, np.ndarray) + assert samples.dtype == dtype def test_generate_with_size_zero(self, mixture_model): """Tests that generating with size = 0 returns an empty array.""" @@ -381,16 +393,6 @@ def test_generate_statistical_properties(self, seed): proportion_c1 = len(samples_from_c1) / n_samples assert proportion_c1 == pytest.approx(weights[0], abs=0.05) - @pytest.mark.parametrize("size", [0, 10]) - def test_generate_returns_array_with_correct_dtype(self, size): - """Tests that generate returns an array with the correct dtype.""" - target_dtype = np.float32 - mixture = MixtureModel([Exponential(0, 1)], dtype=target_dtype) - - samples = mixture.generate(size=size) - assert samples.shape == (size,) - assert samples.dtype == target_dtype - class TestMixtureModelDunderMethods: """Tests for special (dunder) methods of MixtureModel.""" @@ -413,7 +415,8 @@ def test_getitem_retrieves_correct_component( ): """Tests that __getitem__ retrieves the correct component by index.""" - assert mixture_model[index] == exp_components[expected_component_index] + dtype = mixture_model.dtype + assert mixture_model[index] == exp_components[expected_component_index].astype(dtype) def test_getitem_out_of_bounds_raises_index_error(self, mixture_model: MixtureModel): """Tests that accessing an out-of-bounds index raises an IndexError.""" @@ -435,6 +438,9 @@ def test_iter_yields_correct_components_in_order( ): """Tests that iterating over the model yields all components in the correct order.""" + dtype = mixture_model.dtype + exp_components = [component.astype(dtype) for component in exp_components] + iterated_components = list(mixture_model) assert iterated_components == list(exp_components) assert all(comp_iter == comp_orig for comp_iter, comp_orig in zip(iterated_components, exp_components)) @@ -467,8 +473,8 @@ def test_copy_is_independent_weights(self, mixture_model: MixtureModel): copied_model = copy(mixture_model) copied_model.log_weights = np.log([0.1, 0.9]) - np.testing.assert_allclose(mixture_model.weights, [0.5, 0.5]) - np.testing.assert_allclose(copied_model.weights, [0.1, 0.9]) + np.testing.assert_allclose(mixture_model.weights, [0.5, 0.5], atol=1e-4) + np.testing.assert_allclose(copied_model.weights, [0.1, 0.9], atol=1e-4) def test_copy_is_independent_components(self, mixture_model: MixtureModel): """Tests that the components in the copied model are also independent copies.""" @@ -486,97 +492,92 @@ def test_copy_is_independent_components(self, mixture_model: MixtureModel): assert mixture_model.components[0].rate != copied_model.components[0].rate +@pytest.mark.parametrize("dtype", DTYPES_TO_TEST) class TestMixtureModelComparison: """Tests the __eq__ and __hash__ methods for MixtureModel.""" - def test_eq_identical_models(self): + def test_eq_identical_models(self, dtype): """Tests that two identical models are equal.""" c = [Exponential(0, 1), Exponential(10, 2)] - m1 = MixtureModel(components=c, weights=[0.4, 0.6]) - m2 = MixtureModel(components=c, weights=[0.4, 0.6]) + m1 = MixtureModel(components=c, weights=[0.4, 0.6], dtype=dtype) + m2 = MixtureModel(components=c, weights=[0.4, 0.6], dtype=dtype) assert m1 == m2 - def test_eq_order_insensitivity(self): + def test_eq_order_insensitivity(self, dtype): """Tests that models with the same components/weights in a different order are equal.""" c1 = [Exponential(0, 1), Exponential(10, 2)] w1 = [0.4, 0.6] - m1 = MixtureModel(components=c1, weights=w1) + m1 = MixtureModel(components=c1, weights=w1, dtype=dtype) c2 = [Exponential(10, 2), Exponential(0, 1)] w2 = [0.6, 0.4] - m2 = MixtureModel(components=c2, weights=w2) + m2 = MixtureModel(components=c2, weights=w2, dtype=dtype) assert m1 == m2 - def test_neq_different_weights(self): + def test_neq_different_weights(self, dtype): """Tests that models with different weights are not equal.""" c = [Exponential(0, 1), Exponential(10, 2)] - m1 = MixtureModel(components=c, weights=[0.4, 0.6]) - m2 = MixtureModel(components=c, weights=[0.41, 0.59]) + m1 = MixtureModel(components=c, weights=[0.4, 0.6], dtype=dtype) + m2 = MixtureModel(components=c, weights=[0.41, 0.59], dtype=dtype) assert m1 != m2 - def test_neq_different_components(self): + def test_neq_different_components(self, dtype): """Tests that models with different components are not equal.""" c1 = [Exponential(0, 1), Exponential(10, 2)] c2 = [Exponential(0, 1), Exponential(99, 2)] - m1 = MixtureModel(components=c1, weights=[0.4, 0.6]) - m2 = MixtureModel(components=c2, weights=[0.4, 0.6]) + m1 = MixtureModel(components=c1, weights=[0.4, 0.6], dtype=dtype) + m2 = MixtureModel(components=c2, weights=[0.4, 0.6], dtype=dtype) assert m1 != m2 - def test_neq_different_n_components(self): + def test_neq_different_n_components(self, dtype): """Tests that models with a different number of components are not equal.""" c1 = [Exponential(0, 1), Exponential(10, 2)] c2 = [Exponential(0, 1)] - m1 = MixtureModel(components=c1, weights=[0.4, 0.6]) - m2 = MixtureModel(components=c2, weights=[1.0]) + m1 = MixtureModel(components=c1, weights=[0.4, 0.6], dtype=dtype) + m2 = MixtureModel(components=c2, weights=[1.0], dtype=dtype) assert m1 != m2 - def test_neq_other_object(self, mixture_model, exp_components): + def test_neq_different_dtype(self, dtype): + """Tests that models with different dtype are not equal.""" + + c1 = [Exponential(0, 1), Exponential(10, 2)] + c2 = [Exponential(0, 1), Exponential(99, 2)] + m1 = MixtureModel(components=c1, weights=[0.4, 0.6], dtype=dtype) + m2 = MixtureModel(components=c2, weights=[0.4, 0.6], dtype=np.float128) + assert m1 != m2 + + def test_neq_other_object(self, mixture_model, exp_components, dtype): """Tests that a model instance is not equal to an object of a different class.""" model = mixture_model other = "not_a_mixture_model" assert model != other - def test_hash_consistency(self): + def test_hash_consistency(self, dtype): """Tests that equal models produce the same hash.""" - m1 = MixtureModel( - components=[Exponential(0, 1), Exponential(10, 2)], - weights=[0.4, 0.6], - ) - m2 = MixtureModel( - components=[Exponential(10, 2), Exponential(0, 1)], - weights=[0.6, 0.4], - ) + m1 = MixtureModel(components=[Exponential(0, 1), Exponential(10, 2)], weights=[0.4, 0.6], dtype=dtype) + m2 = MixtureModel(components=[Exponential(10, 2), Exponential(0, 1)], weights=[0.6, 0.4], dtype=dtype) assert m1 == m2 assert hash(m1) == hash(m2) - def test_hash_difference(self): + def test_hash_difference(self, dtype): """Tests that non-equal models are likely to have different hashes.""" - m1 = MixtureModel( - components=[Exponential(0, 1), Exponential(10, 2)], - weights=[0.4, 0.6], - ) - m2 = MixtureModel( - components=[Exponential(0, 1), Exponential(10, 2)], - weights=[0.5, 0.5], - ) + m1 = MixtureModel(components=[Exponential(0, 1), Exponential(10, 2)], weights=[0.4, 0.6], dtype=dtype) + m2 = MixtureModel(components=[Exponential(0, 1), Exponential(10, 2)], weights=[0.5, 0.5], dtype=dtype) assert m1 != m2 assert hash(m1) != hash(m2) - def test_hash_changes_after_modifying_weights(self): + def test_hash_changes_after_modifying_weights(self, dtype): """Tests that the hash of the model updates after its weights are changed.""" - model = MixtureModel( - components=[Exponential(0, 1), Exponential(10, 2)], - weights=[0.4, 0.6], - ) + model = MixtureModel(components=[Exponential(0, 1), Exponential(10, 2)], weights=[0.4, 0.6], dtype=dtype) old_hash = hash(model) model.log_weights = np.log([0.5, 0.5]) @@ -584,12 +585,9 @@ def test_hash_changes_after_modifying_weights(self): assert old_hash != new_hash - def test_hash_changes_after_adding_component(self): + def test_hash_changes_after_adding_component(self, dtype): """Tests that the hash of the model updates after a new component is added.""" - model = MixtureModel( - components=[Exponential(0, 1), Exponential(10, 2)], - weights=[0.4, 0.6], - ) + model = MixtureModel(components=[Exponential(0, 1), Exponential(10, 2)], weights=[0.4, 0.6], dtype=dtype) old_hash = hash(model) model.add_component(Exponential(99, 3), weight=0.1) @@ -599,11 +597,10 @@ def test_hash_changes_after_adding_component(self): assert model.n_components == expected_n_components assert old_hash != new_hash - def test_hash_changes_after_removing_component(self): + def test_hash_changes_after_removing_component(self, dtype): """Tests that the hash of the model updates after a component is removed.""" model = MixtureModel( - components=[Exponential(0, 1), Exponential(10, 2), Exponential(99, 3)], - weights=[0.4, 0.5, 0.1], + components=[Exponential(0, 1), Exponential(10, 2), Exponential(99, 3)], weights=[0.4, 0.5, 0.1], dtype=dtype ) old_hash = hash(model) diff --git a/rework_tests/unit/core/test_parameter.py b/rework_tests/unit/core/test_parameter.py index 13fdfe84..911a7f6e 100644 --- a/rework_tests/unit/core/test_parameter.py +++ b/rework_tests/unit/core/test_parameter.py @@ -8,6 +8,8 @@ import pytest from rework_pysatl_mpest.core import Parameter +DTYPES_TO_TEST = [np.float16, np.float32, np.float64] + class _OwnerClass: """ @@ -19,49 +21,26 @@ class _OwnerClass: positive_param = Parameter(invariant=lambda x: x > 0, error_message="Value must be positive.") any_param = Parameter() - def __init__(self, positive_val: float, any_val: float): + def __init__(self, positive_val: float, any_val: float, dtype: np.floating): """ Initializes the owner class and its parameters. Also initializes a set to keep track of fixed parameters. """ self._fixed_params: set[str] = set() - self.positive_param = positive_val - self.any_param = any_val - - -class _OwnerClassWithDType: - """A helper class that has a dtype attribute.""" - - positive_param = Parameter(invariant=lambda x: x > 0, error_message="Value must be positive.") - any_param = Parameter() - - def __init__(self, positive_val: float, any_val: float, dtype=np.float64): - """ - Initializes the owner class and its parameters. - Also initializes a set to keep track of fixed parameters. - """ self.dtype = dtype - self._fixed_params: set[str] = set() + self.positive_param = positive_val self.any_param = any_val -@pytest.fixture -def owner_instance() -> _OwnerClass: +@pytest.fixture(params=DTYPES_TO_TEST) +def owner_instance(request) -> _OwnerClass: """ Pytest fixture to provide a clean instance of _OwnerClass for each test. """ - return _OwnerClass(positive_val=10.0, any_val=-5.0) - - -@pytest.fixture -def owner_instance_float32() -> _OwnerClassWithDType: - """ - Pytest fixture to provide a clean instance of _OwnerClassWithDType for each test. - """ - - return _OwnerClassWithDType(positive_val=10.0, any_val=-5.0, dtype=np.float32) + dtype = request.param + return _OwnerClass(positive_val=10.0, any_val=-5.0, dtype=dtype) def test_parameter_initialization(): @@ -129,15 +108,17 @@ def test_get_from_instance_returns_value(owner_instance: _OwnerClass): returns the actual float value stored in the instance. """ + dtype = owner_instance.dtype + positive_value = owner_instance.positive_param expected_positive_value = 10.0 any_value = owner_instance.any_param expected_any_value = -5.0 - assert isinstance(positive_value, float) - assert positive_value == expected_positive_value - assert isinstance(any_value, float) - assert any_value == expected_any_value + assert isinstance(positive_value, dtype) + assert np.isclose(positive_value, expected_positive_value, atol=1e-3) + assert isinstance(any_value, dtype) + assert np.isclose(any_value, expected_any_value, atol=1e-3) def test_set_valid_value(owner_instance: _OwnerClass): @@ -146,9 +127,12 @@ def test_set_valid_value(owner_instance: _OwnerClass): successfully assigned to the parameter. """ + dtype = owner_instance.dtype + new_positive_value = 25.5 owner_instance.positive_param = new_positive_value assert owner_instance.positive_param == new_positive_value + assert isinstance(owner_instance.positive_param, dtype) @pytest.mark.parametrize( @@ -205,33 +189,3 @@ def test_can_set_unfixed_parameter_after_fixing_another(owner_instance: _OwnerCl assert owner_instance.any_param == expected_any_value assert owner_instance.positive_param == expected_positive_value - - -def test_get_from_instance_with_dtype_returns_correct_type(owner_instance_float32: _OwnerClassWithDType): - """ - Tests that __get__ returns a value of the correct DType when the owner - instance has a `dtype` attribute. - """ - - positive_value = owner_instance_float32.positive_param - any_value = owner_instance_float32.any_param - - assert isinstance(positive_value, np.float32) - assert isinstance(any_value, np.float32) - - assert positive_value == np.float32(10.0) - assert any_value == np.float32(-5.0) - - -def test_set_and_get_with_dtype_casting(owner_instance_float32: _OwnerClassWithDType): - """ - Tests the full set -> get cycle with dtype casting. - """ - - new_positive_value = 123.45 - owner_instance_float32.positive_param = new_positive_value - - retrieved_value = owner_instance_float32.positive_param - - assert isinstance(retrieved_value, np.float32) - assert retrieved_value == np.float32(new_positive_value) From fcffc43da0e2a95b4854b44d91389de4c29635a7 Mon Sep 17 00:00:00 2001 From: xImoZA Date: Mon, 24 Nov 2025 16:04:59 +0300 Subject: [PATCH 6/7] chore(tests): use dynamic tolerances based on np.finfo for pdf, lpdf of MixtureModel --- rework_tests/unit/core/test_mixture.py | 33 ++++++++++++++++---------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/rework_tests/unit/core/test_mixture.py b/rework_tests/unit/core/test_mixture.py index db061b74..6a8d599d 100644 --- a/rework_tests/unit/core/test_mixture.py +++ b/rework_tests/unit/core/test_mixture.py @@ -68,9 +68,10 @@ def test_init_with_specified_weights(self, exp_components: tuple[Exponential, Ex model = MixtureModel(components=exp_components, weights=weights, dtype=dtype) expected_n_components = 2 assert model.n_components == expected_n_components - if dtype == np.float64: - np.testing.assert_allclose(model.weights, weights) - np.testing.assert_allclose(np.exp(model.log_weights), weights, atol=1e-9) + + atol = np.finfo(dtype).eps + np.testing.assert_allclose(model.weights, weights, rtol=atol) + np.testing.assert_allclose(np.exp(model.log_weights), weights, rtol=atol) @pytest.mark.parametrize( "invalid_weights, error_msg", @@ -134,11 +135,11 @@ def test_init_casts_component_dtypes(self, dtype): def test_init_does_not_recreate_components_with_correct_dtype(self, dtype): """Tests that components with the correct dtype are not recreated.""" - comp_f32 = Exponential(loc=0.0, rate=1.0, dtype=dtype) + comp = Exponential(loc=0.0, rate=1.0, dtype=dtype) - original_id = id(comp_f32) + original_id = id(comp) - mixture = MixtureModel(components=[comp_f32], dtype=dtype) + mixture = MixtureModel(components=[comp], dtype=dtype) assert id(mixture.components[0]) == original_id @@ -304,24 +305,26 @@ def test_pdf_calculation(self, mixture_model: MixtureModel, X): expected_pdf = w1 * c1.pdf(X) + w2 * c2.pdf(X) calculated_pdf = mixture_model.pdf(X) - assert isinstance(calculated_pdf, np.ndarray) assert calculated_pdf.dtype == dtype - if dtype == np.float64: - np.testing.assert_allclose(calculated_pdf, expected_pdf) + if not np.isscalar(X): + assert isinstance(calculated_pdf, np.ndarray) + + np.testing.assert_allclose(calculated_pdf, expected_pdf, rtol=np.finfo(dtype).eps) @pytest.mark.parametrize("X", [1.5, [1.5], np.array([1.0, 1.5, 6.0])]) def test_lpdf_calculation(self, mixture_model: MixtureModel, X): """Tests the LPDF calculation against the definition.""" dtype = mixture_model.dtype + c1, c2 = mixture_model.components + w1, w2 = mixture_model.weights - expected_lpdf = np.log(mixture_model.pdf(X)) + expected_lpdf = np.log(w1 * c1.pdf(X) + w2 * c2.pdf(X)) calculated_lpdf = mixture_model.lpdf(X) assert isinstance(calculated_lpdf, np.ndarray) assert calculated_lpdf.dtype == dtype - if dtype == np.float64: - np.testing.assert_allclose(calculated_lpdf, expected_lpdf) + np.testing.assert_allclose(calculated_lpdf, expected_lpdf, rtol=np.finfo(dtype).eps) def test_loglikelihood_calculation(self, mixture_model: MixtureModel): """Tests that loglikelihood is the sum of LPDF values.""" @@ -353,7 +356,11 @@ def test_generate_returns_correct_size(self, mixture_model: MixtureModel): def test_generate_with_size_zero(self, mixture_model): """Tests that generating with size = 0 returns an empty array.""" - assert len(mixture_model.generate(0)) == 0 + dtype = mixture_model.dtype + + samples = mixture_model.generate(0) + assert len(samples) == 0 + assert samples.dtype == dtype @pytest.mark.parametrize("size", [-1, -10]) def test_generate_with_negative_size(self, mixture_model: MixtureModel, size: int): From cd36859fb8506735cd8d66834eb3c7f447fc1a29 Mon Sep 17 00:00:00 2001 From: xImoZA Date: Mon, 24 Nov 2025 21:07:34 +0300 Subject: [PATCH 7/7] refactor(q_function/Pareto): use generic dtype --- rework_pysatl_mpest/distributions/beta.py | 2 +- rework_pysatl_mpest/distributions/cauchy.py | 7 ++++++- rework_tests/unit/distributions/test_beta.py | 4 ++-- .../_strategies/q_function/test_generalized_q_function.py | 4 ++-- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/rework_pysatl_mpest/distributions/beta.py b/rework_pysatl_mpest/distributions/beta.py index a0eb41a9..c71eee43 100644 --- a/rework_pysatl_mpest/distributions/beta.py +++ b/rework_pysatl_mpest/distributions/beta.py @@ -1,4 +1,4 @@ -"""Module providing four parametric beta distribution distribution class""" +"""Module providing four parametric beta distribution class""" __author__ = "Maksim Pastukhov, Aleksandra Ri" __copyright__ = "Copyright (c) 2025 PySATL project" diff --git a/rework_pysatl_mpest/distributions/cauchy.py b/rework_pysatl_mpest/distributions/cauchy.py index ba487587..3e5b4862 100644 --- a/rework_pysatl_mpest/distributions/cauchy.py +++ b/rework_pysatl_mpest/distributions/cauchy.py @@ -152,7 +152,12 @@ def lpdf(self, X): X = np.asarray(X, dtype=self.dtype) dtype = self.dtype - return np.log(dtype(1.0)) - np.log(dtype(np.pi)) - np.log(self.scale) - np.log(dtype(1.0) + ((X - self.loc) / self.scale) ** 2) + return ( + np.log(dtype(1.0)) + - np.log(dtype(np.pi)) + - np.log(self.scale) + - np.log(dtype(1.0) + ((X - self.loc) / self.scale) ** 2) + ) def _dlog_loc(self, X): """Partial derivative of the lpdf w.r.t. the :attr:`loc` parameter. diff --git a/rework_tests/unit/distributions/test_beta.py b/rework_tests/unit/distributions/test_beta.py index ace54c4e..9887daff 100644 --- a/rework_tests/unit/distributions/test_beta.py +++ b/rework_tests/unit/distributions/test_beta.py @@ -78,7 +78,7 @@ def test_initialization_successful(self, dtype): """Tests that the instance is initialized correctly with valid parameters.""" shape1, shape2, left_border, right_border = 0.5, 2.0, -1.0, 1.0 - dist = Beta(alpha=shape1, beta=shape2, lower_bound=left_border, upper_bound=right_border, dtype=dtype) + dist = Beta(alpha=shape1, beta=shape2, left_border=left_border, right_border=right_border, dtype=dtype) assert dist.alpha.dtype == dtype assert dist.beta.dtype == dtype assert dist.left_border.dtype == dtype @@ -145,7 +145,7 @@ def test_invariant_bounds_violation(self, dtype): def test_repr_method(self, dtype): """Tests that the __repr__ method provides a reproducible string.""" - dist = Beta(alpha=1.1, beta=2.1, lower_bound=10.1, upper_bound=20.1, dtype=dtype) + dist = Beta(alpha=1.1, beta=2.1, left_border=10.1, right_border=20.1, dtype=dtype) repr_str = repr(dist) assert ( repr_str == f"Beta(alpha={dist.alpha}, beta={dist.beta}, left_border={dist.left_border}, " diff --git a/rework_tests/unit/estimators/iterative/_strategies/q_function/test_generalized_q_function.py b/rework_tests/unit/estimators/iterative/_strategies/q_function/test_generalized_q_function.py index ee06af8c..24345519 100644 --- a/rework_tests/unit/estimators/iterative/_strategies/q_function/test_generalized_q_function.py +++ b/rework_tests/unit/estimators/iterative/_strategies/q_function/test_generalized_q_function.py @@ -28,8 +28,8 @@ class DummyDistribution(ContinuousDistribution): param1 = Parameter() param2 = Parameter() - def __init__(self, param1: float, param2: float): - super().__init__() + def __init__(self, param1: float, param2: float, dtype: np.floating = np.float64): + super().__init__(dtype=dtype) self.param1 = param1 self.param2 = param2