diff --git a/copulas/univariate/__init__.py b/copulas/univariate/__init__.py index 954e5ee1..477c86bc 100644 --- a/copulas/univariate/__init__.py +++ b/copulas/univariate/__init__.py @@ -3,6 +3,7 @@ from copulas.univariate.gamma import GammaUnivariate from copulas.univariate.gaussian import GaussianUnivariate from copulas.univariate.gaussian_kde import GaussianKDE +from copulas.univariate.hyper_laplace import HyperLaplace from copulas.univariate.log_laplace import LogLaplace from copulas.univariate.student_t import StudentTUnivariate from copulas.univariate.truncated_gaussian import TruncatedGaussian @@ -19,5 +20,6 @@ 'ParametricType', 'BoundedType', 'UniformUnivariate', - 'LogLaplace' + 'LogLaplace', + 'HyperLaplace' ) diff --git a/copulas/univariate/hyper_laplace.py b/copulas/univariate/hyper_laplace.py new file mode 100644 index 00000000..86431a15 --- /dev/null +++ b/copulas/univariate/hyper_laplace.py @@ -0,0 +1,300 @@ +from random import random + +import numpy as np +import scipy.stats +from scipy.special import gamma, psi + +from copulas.univariate.base import BoundedType, ParametricType, Univariate + + +def gamma_d(x): + """ + Compute the derivative of gamma function + + Arguments: + x (float) + + Returns: + float: + The derivative of gamma function at x + """ + return gamma(x) * psi(x) + + +def g3g1_over_g22(x): + """ + Compute the value of Gamma(3x)Gamma(x)/Gamma^2(2x) + + Arguments: + x (float) + + Returns: + float: + The value of Gamma(3x)Gamma(x)/Gamma^2(2x) + """ + return gamma(3 * x) * gamma(x) / gamma(2 * x) ** 2 + + +def g3g1_over_g22_d(x): + """ + Compute the derivative of Gamma(3x)Gamma(x)/Gamma^2(2x) + + Arguments: + x (float) + + Returns: + float: + The derivative of Gamma(3x)Gamma(x)/Gamma^2(2x) at x + """ + x1 = gamma(x) + x2 = gamma(2 * x) + x3 = gamma(3 * x) + + x3_d = 3 * gamma_d(3 * x) * x1 * (x2 ** 2) + x1_d = x3 * gamma_d(x) * (x2 ** 2) + x2_d = 4 * x3 * x1 * gamma_d(2 * x) * x2 + return (x3_d + x1_d - x2_d) / (x2 ** 4) + + +def solve_gamma_equation(y, num_iter=10, upper_threshold=2, lower_threshold=0.5): + """ + Solve for the equation Gamma(3x)Gamma(x)/Gamma^2(2x) = y using Newton's method + + Arguments: + y (float) + + num_iter (int): + number of iterations when using Newton's method + + upper_threshold (float): + Maximum value of x_new/x_old allowed in Newton's method. Should be > 1 + + lower_threshold (float): + Minimum value of x_new/x_old allowed in Newton's method. Should be in (0,1) + + Returns: + float: + The solution to Gamma(3x)Gamma(x)/Gamma^2(2x) = y + """ + x = 1 + for i in range(num_iter): + x_new = x + (y - g3g1_over_g22(x)) / g3g1_over_g22_d(x) + if x_new > upper_threshold * x: + x = upper_threshold * x + elif x_new < lower_threshold * x: + x = lower_threshold * x + else: + x = x_new + return x + + +class HyperLaplace(Univariate): + """ + An HyperLaplace model object, implemented via a wrapper around scipy.stats.gamma + + Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html + + Math derivation: HyperLaplace(k, alpha) = + (gamma(loc = 0, scale = 1/k, a = 1/alpha))**(1/alpha) * Unif({-1,1}) + """ + + PARAMETRIC = ParametricType.PARAMETRIC + BOUNDED = BoundedType.UNBOUNDED + + MODEL_CLASS = scipy.stats.gamma + + _params = None + _model = None + + alpha = None + k = None + + def probability_density(self, X): + """Compute the probability density for each point in X. + + Arguments: + X (numpy.ndarray): + Values for which the probability density will be computed. + It must have shape (n, 1). + + Returns: + numpy.ndarray: + Probability density values for points in X. + + Raises: + NotFittedError: + if the model is not fitted. + """ + self.check_fit() + return self._model.pdf(abs(X) ** self.alpha) / 2 * self.alpha * abs(X) ** (self.alpha - 1) + + def log_probability_density(self, X): + """Compute the log of the probability density for each point in X. + + Arguments: + X (numpy.ndarray): + Values for which the log probability density will be computed. + It must have shape (n, 1). + + Returns: + numpy.ndarray: + Log probability density values for points in X. + + Raises: + NotFittedError: + if the model is not fitted. + """ + self.check_fit() + + return np.log(self.probability_density(X)) + + def cumulative_distribution(self, X): + """Compute the cumulative distribution value for each point in X. + + Arguments: + X (numpy.ndarray): + Values for which the cumulative distribution will be computed. + It must have shape (n, 1). + + Returns: + numpy.ndarray: + Cumulative distribution values for points in X. + + Raises: + NotFittedError: + if the model is not fitted. + """ + self.check_fit() + gamma_cdf = self._model.cdf(abs(X) ** self.alpha) + for i in range(len(X)): + cd, x = gamma_cdf[i], X[i] + if x > 0: + gamma_cdf[i] = (cd + 1) / 2 + else: + gamma_cdf[i] = (1 - cd) / 2 + + return gamma_cdf + + def percent_point(self, U): + """Compute the inverse cumulative distribution value for each point in U. + + Arguments: + U (numpy.ndarray): + Values for which the cumulative distribution will be computed. + It must have shape (n, 1) and values must be in [0,1]. + + Returns: + numpy.ndarray: + Inverse cumulative distribution values for points in U. + + Raises: + NotFittedError: + if the model is not fitted. + """ + self.check_fit() + gamma_ppf = self._model.ppf(abs(U - 0.5) * 2) ** (1 / self.alpha) + for i in range(len(U)): + pp, u = gamma_ppf[i], U[i] + if u < 0.5: + gamma_ppf[i] = - pp + + return gamma_ppf + + def sample(self, n_samples=1): + """Sample values from this model. + + Argument: + n_samples (int): + Number of values to sample + + Returns: + numpy.ndarray: + Array of shape (n_samples, 1) with values randomly + sampled from this model distribution. + + Raises: + NotFittedError: + if the model is not fitted. + """ + self.check_fit() + gamma_rvs = self._model.rvs(n_samples) ** (1 / self.alpha) + for i in range(n_samples): + if random() > 0.5: + gamma_rvs[i] = - gamma_rvs[i] + return gamma_rvs + + def _fit(self, X): + """Fit the model to a non-constant random variable. + + This fitting method is implemented by matching the theoritical mean + and variance with the emperical mean and variance. + + Arguments: + X (numpy.ndarray): + Values of the random variable. It must have shape (n, 1). + """ + mean = np.mean(abs(X)) + square = np.mean(X ** 2) + a = solve_gamma_equation(square / (mean ** 2)) + self.alpha = 1 / a + self.k = (gamma(2 * a) / (gamma(a) * mean)) ** self.alpha + + self._params = { + 'loc': 0, + 'scale': 1 / self.k, + 'a': a + } + + def _get_model(self): + return self.MODEL_CLASS(**self._params) + + def _fit_constant(self, X): + self._params = { + 'loc': np.unique(X)[0], + 'scale': 0, + 'a': 1 + } + + def _is_constant(self): + return self._params['scale'] == 0 + + def fit(self, X): + """Fit the model to a random variable. + + Arguments: + X (numpy.ndarray): + Values of the random variable. It must have shape (n, 1). + """ + if self._check_constant_value(X): + self._fit_constant(X) + else: + self._fit(X) + self._model = self._get_model() + + self.fitted = True + + def _get_params(self): + """Return attributes from self._model to serialize. + + Must be implemented in all the subclasses. + + Returns: + dict: + Parameters to recreate self._model in its current fit status. + """ + return self._params.copy() + + def _set_params(self, params): + """Set the parameters of this univariate. + + Args: + params (dict): + Parameters to recreate this instance. + """ + self._params = params.copy() + self.k = 1 / self._params['scale'] + self.alpha = 1 / self._params['a'] + if self._is_constant(): + self._replace_constant_methods() + else: + self._model = self._get_model() diff --git a/tests/unit/univariate/test_base.py b/tests/unit/univariate/test_base.py index f76c3b0b..adaaf720 100644 --- a/tests/unit/univariate/test_base.py +++ b/tests/unit/univariate/test_base.py @@ -7,6 +7,7 @@ from copulas.univariate.gamma import GammaUnivariate from copulas.univariate.gaussian import GaussianUnivariate from copulas.univariate.gaussian_kde import GaussianKDE +from copulas.univariate.hyper_laplace import HyperLaplace from copulas.univariate.log_laplace import LogLaplace from copulas.univariate.student_t import StudentTUnivariate from copulas.univariate.truncated_gaussian import TruncatedGaussian @@ -29,7 +30,8 @@ def test__select_candidates(self): GammaUnivariate, StudentTUnivariate, UniformUnivariate, - LogLaplace + LogLaplace, + HyperLaplace } def test__select_candidates_parametric(self): @@ -44,7 +46,8 @@ def test__select_candidates_parametric(self): GammaUnivariate, StudentTUnivariate, UniformUnivariate, - LogLaplace + LogLaplace, + HyperLaplace } def test__select_candidates_non_parametric(self): @@ -73,7 +76,8 @@ def test__select_candidates_unbounded(self): assert set(candidates) == { GaussianKDE, GaussianUnivariate, - StudentTUnivariate + StudentTUnivariate, + HyperLaplace } def test__select_candidates_semibounded(self): diff --git a/tests/unit/univariate/test_hyper_laplace.py b/tests/unit/univariate/test_hyper_laplace.py new file mode 100644 index 00000000..7f2bf7d5 --- /dev/null +++ b/tests/unit/univariate/test_hyper_laplace.py @@ -0,0 +1,44 @@ +from unittest import TestCase + +import numpy as np +from scipy.stats import norm + +from copulas.univariate.hyper_laplace import HyperLaplace + + +class TestHyperLaplace(TestCase): + + def test__fit_constant(self): + distribution = HyperLaplace() + + distribution._fit_constant(np.array([1, 1, 1, 1])) + + assert distribution._params == { + 'loc': 1, + 'scale': 0, + 'a': 1 + } + + def test__fit(self): + distribution = HyperLaplace() + + data = norm.rvs(size=100000, loc=0, scale=1) + distribution._fit(data) + + assert distribution._params['loc'] == 0 + assert distribution._params['scale'] >= 1.90 and distribution._params['scale'] <= 2.10 + assert distribution._params['a'] >= 0.48 and distribution._params['a'] <= 0.52 + + def test__is_constant_true(self): + distribution = HyperLaplace() + + distribution.fit(np.array([1, 1, 1, 1])) + + assert distribution._is_constant() + + def test__is_constant_false(self): + distribution = HyperLaplace() + + distribution.fit(np.array([1, 2, 3, 4])) + + assert not distribution._is_constant()