diff --git a/.gitignore b/.gitignore index 23c482eb..d8e930b6 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,4 @@ volestipy.egg-info venv lp_solve_5.5/ .devcontainer/ -.github/dependabot.yml \ No newline at end of file +.github/dependabot.yml.venv/ diff --git a/.python-version b/.python-version new file mode 100644 index 00000000..143c2f5d --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.8.19 diff --git a/dingo/PolytopeSampler.py b/dingo/PolytopeSampler.py index 27f37853..88f673e4 100644 --- a/dingo/PolytopeSampler.py +++ b/dingo/PolytopeSampler.py @@ -6,10 +6,13 @@ # Licensed under GNU LGPL.3, see LICENCE file +# Contributed and/or modified by Iva Janković, as part of Google Summer of Code 2025 program. import numpy as np import warnings +from typing import Optional import math +import time from dingo.MetabolicNetwork import MetabolicNetwork from dingo.utils import ( map_samples_to_steady_states, @@ -29,12 +32,12 @@ def __init__(self, metabol_net): raise Exception("An unknown input object given for initialization.") self._metabolic_network = metabol_net - self._A = [] - self._b = [] - self._N = [] - self._N_shift = [] - self._T = [] - self._T_shift = [] + self._A = None + self._b = None + self._N = None + self._N_shift = None + self._T = None + self._T_shift = None self._parameters = {} self._parameters["nullspace_method"] = "sparseQR" self._parameters["opt_percentage"] = self.metabolic_network.parameters[ @@ -46,6 +49,7 @@ def __init__(self, metabol_net): self._parameters["tol"] = 1e-06 self._parameters["solver"] = None + self._last_hpoly = None def get_polytope(self): """A member function to derive the corresponding full dimensional polytope @@ -53,14 +57,15 @@ def get_polytope(self): """ if ( - self._A == [] - or self._b == [] - or self._N == [] - or self._N_shift == [] - or self._T == [] - or self._T_shift == [] + self._A is None + or self._b is None + or self._N is None + or self._N_shift is None + or self._T is None + or self._T_shift is None ): + ( max_flux_vector, max_objective, @@ -166,7 +171,7 @@ def generate_steady_states_no_multiphase( """A member function to sample steady states. Keyword arguments: - method -- An MCMC method to sample, i.e. {'billiard_walk', 'cdhr', 'rdhr', 'ball_walk', 'dikin_walk', 'john_walk', 'vaidya_walk', 'gaussian_hmc_walk', 'exponential_hmc_walk', 'hmc_leapfrog_gaussian', 'hmc_leapfrog_exponential'} + method -- An MCMC method to sample, i.e. {'billiard_walk', 'cdhr', 'rdhr', 'ball_walk', 'dikin_walk', 'john_walk', 'vaidya_walk', 'gaussian_hmc_walk', 'exponential_hmc_walk', 'hmc_leapfrog_gaussian', 'hmc_leapfrog_exponential} n -- the number of steady states to sample burn_in -- the number of points to burn before sampling thinning -- the walk length of the chain @@ -190,6 +195,50 @@ def generate_steady_states_no_multiphase( return steady_states + def generate_steady_states_sb_once(self, n=1000, burn_in=0, sampler="sb", nreflections=None, walk_len=None): + """ + Single-phase boundary sampler with clear separation of concerns. + - sampler: "sb"/"shake_and_bake" or "bsb"/"billiard_shake_and_bake" + - walk_len: default ceil(sqrt(d)) with a minimum of 5 + - nreflections: only used for BSB; defaults to ceil(sqrt(d)) + """ + + self.get_polytope() + P = HPolytope(self._A, self._b) + d = int(self._A.shape[1]) + + wl = int(walk_len) if walk_len is not None else max(5, int(np.sqrt(d))) + use_bsb = str(sampler).lower() in ("bsb", "billiard_shake_and_bake") + nref = ( + int(nreflections) + if nreflections is not None + else (int(np.ceil(np.sqrt(d))) if use_bsb else 0) + ) + + S = P.boundary_sample( + sampler="bsb" if use_bsb else "sb", + number_of_points=int(n), + number_of_points_to_burn=int(burn_in), + walk_len=int(wl), + nreflections=int(nref), + ) + + finite_cols = np.isfinite(S).all(axis=0) + if not finite_cols.all(): + S = S[:, finite_cols] + if S.shape[1] == 0: + raise RuntimeError( + "All sample columns contain NaN/Inf; check walk_len, nreflections, or polytope constraints." + ) + + steady_states = map_samples_to_steady_states(S, self._N, self._N_shift) + + self._last_hpoly = P # cache if needed later + return steady_states + + + + @staticmethod def sample_from_polytope( A, b, ess=1000, psrf=False, parallel_mmcs=False, num_threads=1, solver=None @@ -223,7 +272,7 @@ def sample_from_polytope_no_multiphase( Keyword arguments: A -- an mxn matrix that contains the normal vectors of the facets of the polytope row-wise b -- a m-dimensional vector, s.t. A*x <= b - method -- An MCMC method to sample, i.e. {'billiard_walk', 'cdhr', 'rdhr', 'ball_walk', 'dikin_walk', 'john_walk', 'vaidya_walk', 'gaussian_hmc_walk', 'exponential_hmc_walk', 'hmc_leapfrog_gaussian', 'hmc_leapfrog_exponential'} + method -- An MCMC method to sample, i.e. {'billiard_walk', 'cdhr', 'rdhr', 'ball_walk', 'dikin_walk', 'john_walk', 'vaidya_walk', 'gaussian_hmc_walk', 'exponential_hmc_walk', 'hmc_leapfrog_gaussian', 'hmc_leapfrog_exponential', 'shake_and_bake', 'billiard_shake_and_bake'} n -- the number of steady states to sample burn_in -- the number of points to burn before sampling thinning -- the walk length of the chain @@ -239,6 +288,65 @@ def sample_from_polytope_no_multiphase( samples_T = samples.T return samples_T + + @staticmethod + def sample_from_polytope_sb_once( + A, + b, + n: int = 1000, + burn_in: int = 0, + sampler: str = "sb", + walk_len: Optional[int] = None, + nreflections: Optional[int] = None, + ): + """ + Perform a single boundary-sampling phase on Ax <= b using SB or BSB. + Returns only the sample matrix (d x N). No timing or diagnostics inside. + """ + + import numpy as np + + A = np.ascontiguousarray(A, dtype=np.float64) + b = np.ascontiguousarray(b, dtype=np.float64) + P = HPolytope(A, b) + d = int(A.shape[1]) + wl = int(walk_len) if walk_len is not None else max(5, int(np.sqrt(d))) + use_bsb = str(sampler).lower() in ("bsb", "billiard_shake_and_bake") + nref = ( + int(nreflections) + if nreflections is not None + else (int(np.ceil(np.sqrt(d))) if use_bsb else 0) + ) + + S = P.boundary_sample( + sampler="bsb" if use_bsb else "sb", + number_of_points=int(n), + number_of_points_to_burn=int(burn_in), + walk_len=int(wl), + nreflections=int(nref), + ) + + finite_cols = np.isfinite(S).all(axis=0) + if not finite_cols.all(): + S = S[:, finite_cols] + if S.shape[1] == 0: + raise RuntimeError( + "All sample columns contain NaN/Inf; adjust walk_len, nreflections, or check constraints." + ) + + return S + + def boundary_diagnostics(self, S): + """ + Compute diagnostics (minESS, maxPSRF, N) for a given sample matrix S + using the current polytope stored in this sampler instance. + """ + S = np.ascontiguousarray(S, dtype=np.float64) + P = HPolytope(self._A, self._b) + diag = P.boundary_diag(S) + return diag + + @staticmethod def round_polytope( @@ -352,4 +460,4 @@ def set_tol(self, value): def set_opt_percentage(self, value): - self._parameters["opt_percentage"] = value + self._parameters["opt_percentage"] = value \ No newline at end of file diff --git a/dingo/bindings/bindings.cpp b/dingo/bindings/bindings.cpp index cbdb1246..7d759861 100644 --- a/dingo/bindings/bindings.cpp +++ b/dingo/bindings/bindings.cpp @@ -6,6 +6,7 @@ // Contributed and/or modified by Haris Zafeiropoulos // Contributed and/or modified by Pedro Zuidberg Dos Martires +// Contributed and/or modified by Iva Janković, as part of Google Summer of Code 2025 program. // Licensed under GNU LGPL.3, see LICENCE file @@ -14,7 +15,9 @@ #include #include "bindings.h" #include "hmc_sampling.h" - +#include "random_walks/shake_and_bake_walk.hpp" +#include "random_walks/billiard_shake_and_bake_walk.hpp" +#include "diagnostics/scaling_ratio.hpp" using namespace std; @@ -92,7 +95,8 @@ double HPolytopeCPP::apply_sampling(int walk_len, double* samples, double variance_value, double* bias_vector_, - int ess){ + int ess, + int nreflections) { RNGType rng(HP.dimension()); HP.normalize(); @@ -167,7 +171,7 @@ double HPolytopeCPP::apply_sampling(int walk_len, } else { - throw std::runtime_error("This function must not be called."); + throw std::runtime_error(method + std::string(" is not recognized as a valid sampling method.")); } if (strcmp(method, "mmcs") != 0) { @@ -181,7 +185,67 @@ double HPolytopeCPP::apply_sampling(int walk_len, } return 0.0; } + +// Boundary sampling: shakre-and-bake and billiard shake-and-bake +int HPolytopeCPP::apply_boundary_sampling(int walk_len, + int number_of_points, + int number_of_points_to_burn, + const char* sampler, + int nreflections, + double* samples) { + RNGType rng(HP.dimension()); + HP.normalize(); + + auto [boundary_pt, facet_idx] = compute_boundary_point(HP, rng, static_cast(1e-8)); + + const int d = HP.dimension(); + std::list batch; + + if (strcmp(sampler, "shake_and_bake") == 0 || strcmp(sampler, "sb") == 0) { + shakeandbake_sampling(batch, HP, rng, walk_len, number_of_points, boundary_pt, number_of_points_to_burn, facet_idx); + + } else if (strcmp(sampler, "billiard_shake_and_bake") == 0 || strcmp(sampler, "bsb") == 0) { + billiard_shakeandbake_sampling(batch, HP, rng, walk_len,nreflections, number_of_points,boundary_pt, number_of_points_to_burn,facet_idx); + + } else { + throw std::runtime_error(std::string(sampler) + " is not a boundary sampler."); + } + + int n_si = 0; + for (auto it = batch.cbegin(); it != batch.cend(); ++it) + for (int j = 0; j < d; ++j) + samples[n_si++] = (*it)[j]; + + return static_cast(batch.size()); +} + ////////// End of "generate_samples()" ////////// +SBDiagnostics HPolytopeCPP::sb_diagnostics(int d, int N, const double* samples) { + MT S(d, N); + for (int c = 0; c < N; ++c) + for (int r = 0; r < d; ++r) + S(r, c) = samples[c*d + r]; + + unsigned int min_ess_u = 0; + VT ess_vec = effective_sample_size(S, min_ess_u); + VT rhat_vec = univariate_psrf(S); + + SBDiagnostics out; + out.minESS = static_cast(ess_vec.minCoeff()); + out.maxPSRF = static_cast(rhat_vec.maxCoeff()); + out.N = N; + return out; +} + +void HPolytopeCPP::set_sb_state_from_buffer(int d, int N, const double* samples, const SBDiagnostics& diag) { + sb_samples_.resize(d, N); + for (int j = 0; j < N; ++j) + for (int i = 0; i < d; ++i) + sb_samples_(i, j) = samples[i + j * d]; + + sb_diag_ = diag; +} + void HPolytopeCPP::get_polytope_as_matrices(double* new_A, double* new_b) const { @@ -428,6 +492,59 @@ void HPolytopeCPP::get_mmcs_samples(double* T_matrix, double* T_shift, double* s mmcs_set_of_parameters.samples.resize(0,0); } +void HPolytopeCPP::get_sb_samples(double* out) const { + const int d = static_cast(sb_samples_.rows()); + const int N = static_cast(sb_samples_.cols()); + for (int j = 0; j < N; ++j) + for (int i = 0; i < d; ++i) + out[i + j * d] = sb_samples_(i, j); +} + +void HPolytopeCPP::get_sb_diagnostics(double* out3) const { + out3[0] = sb_diag_.minESS; + out3[1] = sb_diag_.maxPSRF; + out3[2] = static_cast(sb_diag_.N); + +} + +void HPolytopeCPP::boundary_scaling_ratio(int d,int N,const double* samples,double tol,double min_ratio,double* scale_out,double* coverage_out,double* max_dev_out,double* avg_dev_out) const +{ + MT S(d, N); + for (int j = 0; j < N; ++j) + { + for (int i = 0; i < d; ++i) + { + S(i, j) = samples[i + j * d]; + } + } + + auto result = scaling_ratio_boundary_test(HP, S, tol, min_ratio); + const VT& scale = std::get<0>(result); + const MT& coverage = std::get<1>(result); + const VT& max_dev = std::get<2>(result); + const VT& avg_dev = std::get<3>(result); + + const int K = static_cast(scale.size()); + const int m = static_cast(coverage.rows()); + + for (int k = 0; k < K; ++k) + { + scale_out[k] = static_cast(scale[k]); + } + + for (int f = 0; f < m; ++f) + { + max_dev_out[f] = static_cast(max_dev[f]); + avg_dev_out[f] = static_cast(avg_dev[f]); + + for (int k = 0; k < K; ++k) + { + coverage_out[f * K + k] = static_cast(coverage(f, k)); + } + } +} + + ////////// Start of "rounding()" ////////// void HPolytopeCPP::apply_rounding(int rounding_method, double* new_A, double* new_b, @@ -512,3 +629,109 @@ void HPolytopeCPP::apply_rounding(int rounding_method, double* new_A, double* ne } ////////// End of "rounding()" ////////// + +////////// Known H-polytope generators wrappers ////////// + +void generate_cube_H(int dim, double scale, double* A_out, double* b_out) +{ + Hpolytope P = generate_cube(static_cast(dim), + false, // Vpoly = false + scale); + const MT& A = P.get_mat(); + const VT& b = P.get_vec(); + const int m = static_cast(A.rows()); + const int n = static_cast(A.cols()); + + for (int i = 0; i < m; ++i) { + b_out[i] = static_cast(b[i]); + for (int j = 0; j < n; ++j) { + A_out[i * n + j] = static_cast(A(i, j)); + } + } +} + +void generate_cross_H(int dim, double* A_out, double* b_out) +{ + Hpolytope P = generate_cross(static_cast(dim),false); + const MT& A = P.get_mat(); + const VT& b = P.get_vec(); + + const int m = static_cast(A.rows()); + const int n = static_cast(A.cols()); + + for (int i = 0; i < m; ++i) { + b_out[i] = static_cast(b[i]); + for (int j = 0; j < n; ++j) { + A_out[i * n + j] = static_cast(A(i, j)); + } + } +} + +void generate_simplex_H(int dim, double* A_out, double* b_out) +{ + Hpolytope P = generate_simplex(static_cast(dim),false); + const MT& A = P.get_mat(); + const VT& b = P.get_vec(); + + const int m = static_cast(A.rows()); + const int n = static_cast(A.cols()); + + for (int i = 0; i < m; ++i) { + b_out[i] = static_cast(b[i]); + for (int j = 0; j < n; ++j) { + A_out[i * n + j] = static_cast(A(i, j)); + } + } +} + +void generate_prod_simplex_H(int dim, double* A_out, double* b_out) +{ + Hpolytope P = generate_prod_simplex(static_cast(dim),false); + const MT& A = P.get_mat(); + const VT& b = P.get_vec(); + + const int m = static_cast(A.rows()); + const int n = static_cast(A.cols()); + + for (int i = 0; i < m; ++i) { + b_out[i] = static_cast(b[i]); + for (int j = 0; j < n; ++j) { + A_out[i * n + j] = static_cast(A(i, j)); + } + } +} + +void generate_skinny_cube_H(int dim, double* A_out, double* b_out) +{ + Hpolytope P = generate_skinny_cube(static_cast(dim),false); + const MT& A = P.get_mat(); + const VT& b = P.get_vec(); + + const int m = static_cast(A.rows()); + const int n = static_cast(A.cols()); + + for (int i = 0; i < m; ++i) { + b_out[i] = static_cast(b[i]); + for (int j = 0; j < n; ++j) { + A_out[i * n + j] = static_cast(A(i, j)); + } + } +} + +void generate_birkhoff_H(int n, double* A_out, double* b_out) +{ + Hpolytope P = generate_birkhoff(static_cast(n)); + const MT& A = P.get_mat(); + const VT& b = P.get_vec(); + + const int m = static_cast(A.rows()); + const int d = static_cast(A.cols()); + + for (int i = 0; i < m; ++i) { + b_out[i] = static_cast(b[i]); + for (int j = 0; j < d; ++j) { + A_out[i * d + j] = static_cast(A(i, j)); + } + } +} +////////// Ending of known H-polytope generators wrappers ////////// \ No newline at end of file diff --git a/dingo/bindings/bindings.h b/dingo/bindings/bindings.h index 553ea99e..6eb3c92f 100644 --- a/dingo/bindings/bindings.h +++ b/dingo/bindings/bindings.h @@ -5,7 +5,8 @@ // Copyright (c) 2018-2021 Apostolos Chalkis // Contributed and/or modified by Haris Zafeiropoulos -// Contributed and/or modified by Pedro Zuidberg Dos Martires +// Contributed and/or modified by Pedro Zuidberg Dos +// Contributed and/or modified by Iva Janković, as part of Google Summer of Code 2025 program. // Licensed under GNU LGPL.3, see LICENCE file @@ -29,16 +30,24 @@ #include "sampling/mmcs.hpp" #include "sampling/parallel_mmcs.hpp" #include "diagnostics/univariate_psrf.hpp" +#include "diagnostics/effective_sample_size.hpp" +#include "diagnostics/scaling_ratio.hpp" //from generate_samples, some extra headers not already included #include +#include #include "sampling/sampling.hpp" #include "ode_solvers/ode_solvers.hpp" +#include "preprocess/feasible_point.hpp" // for rounding #include "preprocess/min_sampling_covering_ellipsoid_rounding.hpp" #include "preprocess/svd_rounding.hpp" #include "preprocess/inscribed_ellipsoid_rounding.hpp" +#include "preprocess/feasible_point.hpp" + +// for creating H polytopes +#include "generators/known_polytope_generators.h" typedef double NT; typedef Cartesian Kernel; @@ -48,6 +57,11 @@ typedef typename Hpolytope::MT MT; typedef typename Hpolytope::VT VT; typedef BoostRandomNumberGenerator RNGType; +struct SBDiagnostics { + double minESS; + double maxPSRF; + long long N; +}; template struct mmcs_parameters @@ -141,7 +155,7 @@ class HPolytopeCPP{ // the apply_sampling() function double apply_sampling(int walk_len, int number_of_points, int number_of_points_to_burn, char* method, double* inner_point, double radius, double* samples, - double variance_value, double* bias_vector, int ess); + double variance_value, double* bias_vector, int ess, int nreflections); void mmcs_initialize(int d, int ess, bool psrf_check, bool parallelism, int num_threads); @@ -155,7 +169,54 @@ class HPolytopeCPP{ void apply_rounding(int rounding_method, double* new_A, double* new_b, double* T_matrix, double* shift, double &round_value, double* inner_point, double radius); + // the boundary sampling function: shake and bake, billiard shake and bake + int apply_boundary_sampling(int walk_len,int number_of_points,int number_of_points_to_burn, const char* sampler, int nreflections, double* samples); + + // Compute diagnostics (minESS, maxPSRF, N, phases, seconds) for a given sample buffer. + // This is a static utility function and does not depend on the internal state. + static SBDiagnostics sb_diagnostics(int d, int N, const double* samples); + + // Return a reference to the last stored diagnostics object (legacy internal storage). + // Used for backward compatibility; modern code should call get_sb_diagnostics(). + inline const SBDiagnostics& sb_diagnostics_legacy() const { return sb_diag_; } + + // Return a reference to the last stored samples matrix (legacy internal storage). + inline const MT& sb_samples_legacy() const { return sb_samples_; } + + // Set the internal Shake-and-Bake state from an external buffer. + // Copies samples (d x N) and diagnostic values into internal members. + void set_sb_state_from_buffer(int d, int N, const double* samples, const SBDiagnostics& diag); + + // Copy the internally stored sample matrix (sb_samples_) into a provided output buffer. + // The output pointer must have enough space for d * N doubles. + void get_sb_samples(double* out) const; + + // Copy the current diagnostics (sb_diag_) into a provided 3-element double buffer. + // Order: [minESS, maxPSRF, N]. + void get_sb_diagnostics(double* out3) const; + + // Compute boundary scaling-ratio diagnostics for a given sample buffer. + // - scale_out: length K (currently 10) scaling factors + // - coverage_out: m x K coverage matrix, stored row-major (facet-major) + // - max_dev_out: length m, maximum deviation per facet (in %) + // - avg_dev_out: length m, average deviation per facet (in %) + void boundary_scaling_ratio(int d,int N,const double* samples,double tol,double min_ratio,double* scale_out,double* coverage_out,double* max_dev_out,double* avg_dev_out) const; + + private: + SBDiagnostics sb_diag_; + MT sb_samples_; + }; +// Known H-polytopes generators + +void generate_cube_H(int dim, double scale, double* A_out, double* b_out); +void generate_cross_H(int dim, double* A_out, double* b_out); +void generate_simplex_H(int dim, double* A_out, double* b_out); +void generate_prod_simplex_H(int dim, double* A_out, double* b_out); +void generate_skinny_cube_H(int dim, double* A_out, double* b_out); +void generate_birkhoff_H(int n, double* A_out, double* b_out); + + #endif diff --git a/dingo/volestipy.pyx b/dingo/volestipy.pyx index 6e8d3a98..40112251 100644 --- a/dingo/volestipy.pyx +++ b/dingo/volestipy.pyx @@ -9,6 +9,8 @@ # Licensed under GNU LGPL.3, see LICENCE file +# Contributed and/or modified by Iva Janković, as part of Google Summer of Code 2025 program. + #!python #cython: language_level=3 #cython: boundscheck=False @@ -42,6 +44,18 @@ def get_time_seed(): # Get classes from the bindings.h file cdef extern from "bindings.h": + void generate_cube_H(int dim, double scale, double* A_out, double* b_out) + void generate_cross_H(int dim, double* A_out, double* b_out) + void generate_simplex_H(int dim, double* A_out, double* b_out) + void generate_prod_simplex_H(int dim, double* A_out, double* b_out) + void generate_skinny_cube_H(int dim, double* A_out, double* b_out) + void generate_birkhoff_H(int n, double* A_out, double* b_out) + + cdef struct SBDiagnostics: + double minESS + double maxPSRF + long long N + # The HPolytopeCPP class along with its functions cdef cppclass HPolytopeCPP: @@ -55,7 +69,7 @@ cdef extern from "bindings.h": # Random sampling double apply_sampling(int walk_len, int number_of_points, int number_of_points_to_burn, \ char* method, double* inner_point, double radius, double* samples, \ - double variance_value, double* bias_vector, int ess) + double variance_value, double* bias_vector, int ess,int nreflections) # Initialize the parameters for the (m)ultiphase (m)onte (c)arlo (s)ampling algorithm void mmcs_initialize(unsigned int d, int ess, int psrf_check, int parallelism, int num_threads); @@ -72,6 +86,17 @@ cdef extern from "bindings.h": void apply_rounding(int rounding_method, double* new_A, double* new_b, double* T_matrix, \ double* shift, double &round_value, double* inner_point, double radius); + int apply_boundary_sampling(int walk_len,int number_of_points,int number_of_points_to_burn,const char* sampler,int nreflections,double* samples) + + void set_sb_state_from_buffer(int d, int N, const double* samples, SBDiagnostics diag) + void get_sb_samples(double* out) + void get_sb_diagnostics(double* out5) + void boundary_scaling_ratio(int d,int N,const double* samples,double tol,double min_ratio,double* scale_out,double* coverage_out,double* max_dev_out,double* avg_dev_out) const + + @staticmethod + SBDiagnostics sb_diagnostics(int d, int N, const double* samples) + + # The lowDimPolytopeCPP class along with its functions cdef cppclass lowDimHPolytopeCPP: @@ -85,7 +110,7 @@ cdef extern from "bindings.h": # Lists with the methods supported by volesti for volume approximation and random walk volume_methods = ["sequence_of_balls".encode("UTF-8"), "cooling_gaussian".encode("UTF-8"), "cooling_balls".encode("UTF-8")] walk_methods = ["uniform_ball".encode("UTF-8"), "CDHR".encode("UTF-8"), "RDHR".encode("UTF-8"), "gaussian_ball".encode("UTF-8"), \ - "gaussian_CDHR".encode("UTF-8"), "gaussian_RDHR".encode("UTF-8"), "uniform_ball".encode("UTF-8"), "billiard".encode("UTF-8")] + "gaussian_CDHR".encode("UTF-8"), "gaussian_RDHR".encode("UTF-8"), "uniform_ball".encode("UTF-8"), "billiard".encode("UTF-8"),"shake_and_bake".encode("UTF-8"),"billiard_shake_and_bake".encode("UTF-8") ] rounding_methods = ["min_ellipsoid".encode("UTF-8"), "svd".encode("UTF-8"), "max_ellipsoid".encode("UTF-8")] # Build the HPolytope class @@ -119,7 +144,7 @@ cdef class HPolytope: # Likewise, the generate_samples() function def generate_samples(self, method, number_of_points, number_of_points_to_burn, walk_len, - variance_value, bias_vector, solver = None, ess = 1000): + variance_value, bias_vector, solver = None, ess = 1000, nreflections=None): n_variables = self._A.shape[1] cdef double[:,::1] samples = np.zeros((number_of_points, n_variables), dtype = np.float64, order = "C") @@ -133,9 +158,99 @@ cdef class HPolytope: self.polytope_cpp.apply_sampling(walk_len, number_of_points, number_of_points_to_burn, \ method, &inner_point_for_c[0], radius, &samples[0,0], \ - variance_value, &bias_vector_[0], ess) + variance_value, &bias_vector_[0], ess, nreflections) return np.asarray(samples) + def boundary_sample(self, sampler="sb", number_of_points=10000,number_of_points_to_burn=0, walk_len=1, nreflections=0): + cdef int d = self._A.shape[1] + cdef int N = number_of_points + cdef int burn = number_of_points_to_burn + cdef int wl = walk_len + cdef int nref = nreflections + cdef bytes sampler_b = sampler.encode("UTF-8") + + cdef np.ndarray[np.float64_t, ndim=2, mode="fortran"] S = \ + np.zeros((d, N), dtype=np.float64, order="F") + + cdef int made = self.polytope_cpp.apply_boundary_sampling(wl, N, burn, sampler_b, nref, &S[0,0]) + if made < 0: + raise RuntimeError("apply_boundary_sampling failed") + + cdef SBDiagnostics diag = HPolytopeCPP.sb_diagnostics(d, made, &S[0,0]) + self.polytope_cpp.set_sb_state_from_buffer(d, made, &S[0,0], diag) + + return np.asarray(S)[:, :made] + + def get_sb_diagnostics(self, out): + """ + out: np.ndarray float64, shape (>=3,), order: + [minESS, maxPSRF, N] + """ + cdef np.ndarray[np.float64_t, ndim=1] out_arr = np.ascontiguousarray(out, dtype=np.float64) + if out_arr.shape[0] < 3: + raise ValueError("out must have length >= 3") + self.polytope_cpp.get_sb_diagnostics( &out_arr[0]) + return np.asarray(out_arr) + + + def get_sb_samples(self): + """ + Returns a d x N matrix from the internal C++ sample buffer. + """ + cdef int d + cdef long long Nll + cdef np.ndarray[np.float64_t, ndim=1] tmp = np.zeros(3, dtype=np.float64) + # Retrieve N from diagnostics (tmp = [minESS, maxPSRF, N]) + self.polytope_cpp.get_sb_diagnostics( &tmp[0]) + Nll = tmp[2] + d = self._A.shape[1] + + cdef np.ndarray[np.float64_t, ndim=2, mode="fortran"] S = \ + np.zeros((d, Nll), dtype=np.float64, order="F") + self.polytope_cpp.get_sb_samples(&S[0,0]) + return np.asarray(S) + + + def boundary_diag(self, S): + """ + Compute diagnostics directly from a given sample matrix S. + Returns a Python dictionary with keys: minESS, maxPSRF, and N. + """ + cdef np.ndarray[np.float64_t, ndim=2] Snp = np.ascontiguousarray(S, dtype=np.float64) + cdef int d = Snp.shape[0] + cdef int N = Snp.shape[1] + cdef SBDiagnostics diag = HPolytopeCPP.sb_diagnostics(d, N, &Snp[0,0]) + return {"minESS": diag.minESS, "maxPSRF": diag.maxPSRF, "N": diag.N} + + def boundary_scaling_ratio(self, S, double tol=1e-10, double min_ratio=0.01): + """ + Compute boundary scaling-ratio diagnostics from a sample matrix S. + Returns + scale : np.ndarray, shape (K,) + coverage : np.ndarray, shape (m, K) + max_dev : np.ndarray, shape (m,) + avg_dev : np.ndarray, shape (m,) + """ + cdef np.ndarray[np.float64_t, ndim=2, mode="fortran"] Snp = \ + np.array(S, dtype=np.float64, order="F") + + cdef int d = Snp.shape[0] + cdef int N = Snp.shape[1] + cdef int m = self._A.shape[0] + cdef int K = 10 # must match C++ scale(10) + + cdef np.ndarray[np.float64_t, ndim=1] scale = \ + np.zeros(K, dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=1] max_dev = \ + np.zeros(m, dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=1] avg_dev = \ + np.zeros(m, dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=2] coverage = \ + np.zeros((m, K), dtype=np.float64, order="C") + + self.polytope_cpp.boundary_scaling_ratio(d,N, &Snp[0, 0],tol,min_ratio,&scale[0],&coverage[0, 0],&max_dev[0],&avg_dev[0]) + + return (np.asarray(scale),np.asarray(coverage),np.asarray(max_dev),np.asarray(avg_dev),) # The rounding() function; as in compute_volume, more than one method is available for this step def rounding(self, rounding_method = 'john_position', solver = None): @@ -215,3 +330,112 @@ cdef class HPolytope: def dimension(self): return self._A.shape[1] + +def create_cube(int dim, double scale=1.0): + """ + Create an H-polytope for a hypercube in R^dim with side length 2*scale + centered at the origin: [-scale, scale]^dim. + + """ + cdef int m = 2 * dim + cdef int n = dim + + cdef np.ndarray[np.float64_t, ndim=2] A = \ + np.zeros((m, n), dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=1] b = \ + np.zeros(m, dtype=np.float64) + + generate_cube_H(dim, scale, &A[0, 0], &b[0]) + + return np.asarray(A), np.asarray(b) + + +def create_crosspolytope(int dim): + """ + Create an H-polytope for the crosspolytope (l1-ball) in R^dim. + + """ + cdef int m = 1 << dim # 2^(dim) + cdef int n = dim + + cdef np.ndarray[np.float64_t, ndim=2] A = \ + np.zeros((m, n), dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=1] b = \ + np.zeros(m, dtype=np.float64) + + generate_cross_H(dim, &A[0, 0], &b[0]) + + return np.asarray(A), np.asarray(b) + + +def create_simplex(int dim): + """ + Create an H-polytope for a standard simplex in R^dim. + + """ + cdef int m = dim + 1 + cdef int n = dim + + cdef np.ndarray[np.float64_t, ndim=2] A = \ + np.zeros((m, n), dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=1] b = \ + np.zeros(m, dtype=np.float64) + + generate_simplex_H(dim, &A[0, 0], &b[0]) + + return np.asarray(A), np.asarray(b) + + +def create_prod_simplex(int dim): + """ + Create an H-polytope for the product of two dim-dimensional simplices. + + """ + cdef int m = 2 * dim + 2 + cdef int n = 2 * dim + + cdef np.ndarray[np.float64_t, ndim=2] A = \ + np.zeros((m, n), dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=1] b = \ + np.zeros(m, dtype=np.float64) + + generate_prod_simplex_H(dim, &A[0, 0], &b[0]) + + return np.asarray(A), np.asarray(b) + + +def create_skinny_cube(int dim): + """ + Create an H-polytope for a 'skinny cube' in R^dim, + where the first coordinate is scaled differently. + + """ + cdef int m = 2 * dim + cdef int n = dim + + cdef np.ndarray[np.float64_t, ndim=2] A = \ + np.zeros((m, n), dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=1] b = \ + np.zeros(m, dtype=np.float64) + + generate_skinny_cube_H(dim, &A[0, 0], &b[0]) + + return np.asarray(A), np.asarray(b) + + +def create_birkhoff(int n): + """ + Creates an H-polytope for the Birkhoff polytope of size n. + + """ + cdef int m = n * n + cdef int d = n * n - 2 * n + 1 + + cdef np.ndarray[np.float64_t, ndim=2] A = \ + np.zeros((m, d), dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=1] b = \ + np.zeros(m, dtype=np.float64) + + generate_birkhoff_H(n, &A[0, 0], &b[0]) + + return np.asarray(A), np.asarray(b) diff --git a/eigen b/eigen index 02f42001..5e8edd21 160000 --- a/eigen +++ b/eigen @@ -1 +1 @@ -Subproject commit 02f420012a169ed9267a8a78083aaa588e713353 +Subproject commit 5e8edd21863b8321fc6b9c82322e6cc8cfc47c14 diff --git a/pyproject.toml b/pyproject.toml index d0cdf087..f0c3cb74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,4 +24,4 @@ networkx = "3.1" [build-system] requires = ["poetry-core>=1.0.0", "cython", "numpy==1.20.1"] -build-backend = "poetry.core.masonry.api" +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 diff --git a/tests/run_sb_exp.py b/tests/run_sb_exp.py new file mode 100644 index 00000000..bb129619 --- /dev/null +++ b/tests/run_sb_exp.py @@ -0,0 +1,191 @@ +# VolEsti (volume computation and sampling library) + +# Copyright (c) 2012-2025 Vissarion Fisikopoulos +# Copyright (c) 2018-2025 Apostolos Chalkis +# Copyright (c) 2025-2025 Iva Janković + +# Contributed and/or modified by Iva Janković, as part of Google Summer of Code 2025 program. + +import os, sys, pickle, argparse +import numpy as np +from time import perf_counter +from volestipy import HPolytope + +def _binary_search_k(P, S_all, L0, S_last, target_ess): + """Find smallest k_rel in [1, S_last.shape[1]] where minESS >= target_ess.""" + lo, hi = 1, S_last.shape[1] + best_k_rel, best_diag = hi, None + + def pref_diag(k_rel): + return P.boundary_diag(S_all[:, : L0 + k_rel]) + + while lo <= hi: + mid = (lo + hi) // 2 + dmid = pref_diag(mid) + if float(dmid.get("minESS", -1.0)) >= target_ess: + best_k_rel, best_diag = mid, dmid + hi = mid - 1 + else: + lo = mid + 1 + + if best_diag is None: + best_diag = P.boundary_diag(S_all[:, : L0 + best_k_rel]) + return best_k_rel, best_diag + +def sample_on_polyround_processed_polytope(p, target_ess, n_chunk, max_total, + sampler, walk_len, burn_in_init): + name = os.path.basename(p) + + with open(p, "rb") as f: + obj = pickle.load(f) + polytope = obj[0] + + A = np.ascontiguousarray(polytope.A.to_numpy(), dtype=np.float64) + b = np.ascontiguousarray(polytope.b.to_numpy(), dtype=np.float64) + assert A.flags['C_CONTIGUOUS'] and b.flags['C_CONTIGUOUS'] + + d, m = A.shape[1], A.shape[0] + m, n = A.shape + d_meta = getattr(polytope, "dimension", None) or getattr(polytope, "d", None) + print(f"m={m}, n={n}, d(from meta)={d_meta}") + + # nreflections = ceil(0.25 * d) for BSB, else 0 + if sampler.lower() == "bsb": + nreflections = int(np.ceil(0.25 * d)) + else: + nreflections = 0 + + print(f"Dimensions = {d}, Constraints = {m}") + + P = HPolytope(A, b) + + S_parts, dur = [], [] + total = 0 + burn = int(burn_in_init) + + while True: + t0 = perf_counter() + S_chunk = P.boundary_sample( + sampler="sb", + number_of_points=int(n_chunk), + number_of_points_to_burn=int(burn), + walk_len=int(walk_len), + nreflections=int(nreflections), + ) + t1 = perf_counter() + + dur.append(t1 - t0) + S_parts.append(S_chunk) + total += S_chunk.shape[1] + burn = 0 # continue without burn-in + + S_all = np.concatenate(S_parts, axis=1) + diag_all = P.boundary_diag(S_all) + minESS_all = float(diag_all.get("minESS", np.nan)) + + if minESS_all >= target_ess: + # find minimal prefix in last chunk + L0 = total - S_chunk.shape[1] + k_rel, diag_at_k = _binary_search_k(P, S_all, L0, S_chunk, target_ess) + k_abs = L0 + k_rel + + # sampling time = sum of past chunks + part of last chunk + frac = k_rel / S_chunk.shape[1] + sampling_seconds = sum(dur[:-1]) + frac * dur[-1] + chunks_used = len(dur) + + print( + f"[{name}] minESS={diag_at_k['minESS']:.3f} " + f"maxPSRF={diag_at_k.get('maxPSRF', np.nan):.3f} " + f"N={int(diag_at_k.get('N', k_abs))} " + f"chunks={chunks_used} sampling_seconds={sampling_seconds:.3f}" + ) + print(f"Total sampling time: {sampling_seconds:.2f} s") + + out_path = f"dingo_polyround_no_multiphase_{name}" + with open(out_path, "wb") as f_out: + pickle.dump( + { + "samples": S_all[:, :k_abs], + "diagnostics": { + **diag_at_k, + "N_at_threshold": int(k_abs), + "chunks": chunks_used, + "sampling_seconds": sampling_seconds, + }, + "params": { + "sampler": sampler, + "walk_len": int(walk_len), + "nreflections": int(nreflections), + "target_ess": target_ess, + "n_chunk": int(n_chunk), + "max_total": int(max_total), + "burn_in_init": int(burn_in_init), + }, + }, + f_out, + ) + print(f"Saved results to {out_path}") + return + + if total >= max_total: + sampling_seconds = sum(dur) + chunks_used = len(dur) + + print( + f"[{name}] minESS={diag_all.get('minESS', np.nan):.3f} " + f"maxPSRF={diag_all.get('maxPSRF', np.nan):.3f} " + f"N={int(diag_all.get('N', S_all.shape[1]))} " + f"chunks={chunks_used} sampling_seconds={sampling_seconds:.3f}" + ) + print(f"Total sampling time: {sampling_seconds:.2f} s (target {target_ess} not reached)") + + out_path = f"dingo_polyround_no_multiphase_{name}" + with open(out_path, "wb") as f_out: + pickle.dump( + { + "samples": S_all, + "diagnostics": { + **diag_all, + "N_total_generated": int(S_all.shape[1]), + "chunks": chunks_used, + "sampling_seconds": sampling_seconds, + }, + "params": { + "sampler": sampler, + "walk_len": int(walk_len), + "nreflections": int(nreflections), + "target_ess": target_ess, + "n_chunk": int(n_chunk), + "max_total": int(max_total), + "burn_in_init": int(burn_in_init), + }, + }, + f_out, + ) + print(f"Saved results to {out_path}") + return + +def _make_parser(): + ap = argparse.ArgumentParser() + ap.add_argument("polytope_pickle", type=str) + ap.add_argument("--target-ess", type=float, default=1000.0) + ap.add_argument("--n-chunk", type=int, default=5000) + ap.add_argument("--max-total", type=int, default=200000) + ap.add_argument("--sampler", type=str, default="bsb", choices=["sb", "bsb"]) + ap.add_argument("--walk-len", type=int, default=1) + ap.add_argument("--burn-in-init", type=int, default=0) + return ap + +if __name__ == "__main__": + parser = _make_parser() + args = parser.parse_args() + sample_on_polyround_processed_polytope( + p=args.polytope_pickle, + target_ess=args.target_ess, + n_chunk=args.n_chunk, + max_total=args.max_total, + sampler=args.sampler, + walk_len=args.walk_len, + burn_in_init=args.burn_in_init, + ) diff --git a/tests/sampling_no_multiphase.py b/tests/sampling_no_multiphase.py index 86e304f3..4bc54e52 100644 --- a/tests/sampling_no_multiphase.py +++ b/tests/sampling_no_multiphase.py @@ -76,4 +76,4 @@ def test_sample_sbml(self): if len(sys.argv) > 1: set_default_solver(sys.argv[1]) sys.argv.pop(1) - unittest.main() + unittest.main() \ No newline at end of file diff --git a/tests/shake_and_bake.py b/tests/shake_and_bake.py new file mode 100644 index 00000000..190bc9e4 --- /dev/null +++ b/tests/shake_and_bake.py @@ -0,0 +1,100 @@ +# dingo : a python library for metabolic networks sampling and analysis +# dingo is part of the GeomScale project + +import unittest +import os +import sys +import time +import warnings +import gc +import numpy as np + +from dingo import MetabolicNetwork, PolytopeSampler +from dingo.pyoptinterface_based_impl import set_default_solver + +# --- environment safety --- +os.environ["OMP_NUM_THREADS"] = "1" +os.environ["OPENBLAS_NUM_THREADS"] = "1" +os.environ["MKL_NUM_THREADS"] = "1" + +# === Global parameters === +N_SAMPLES = 50000 +BURN_IN = 200 +NREFL = None # will default to ceil(sqrt(d)) inside generate_steady_states_sb_once + +def _print_diag(diag: dict, elapsed_s: float, tag: str): + secs_cpp = diag.get("seconds", None) + secs_cpp_str = f"{secs_cpp:.6f}s" if isinstance(secs_cpp, float) else "None" + print( + f"\n[{tag}] Diagnostics:\n" + f" minESS = {diag.get('minESS')}\n" + f" maxPSRF = {diag.get('maxPSRF')}\n" + f" N = {diag.get('N')}\n" + f" phases = {diag.get('phases')}\n" + f" seconds(C++) = {secs_cpp_str}\n" + f" elapsed(py) = {elapsed_s:.6f}s\n" + ) + +def _print_samples_summary(steady_states: np.ndarray, tag: str, n_dims_preview: int = 3): + ss_min = np.nanmin(steady_states) + ss_max = np.nanmax(steady_states) + print(f"[{tag}] Steady states summary:") + print(f" shape = {steady_states.shape}, global min = {ss_min:.6g}, global max = {ss_max:.6g}") + d, N = steady_states.shape + dims = min(n_dims_preview, d) + for i in range(dims): + mean_i = np.nanmean(steady_states[i, :]) + std_i = np.nanstd(steady_states[i, :]) + print(f" dim {i}: mean = {mean_i:.6g}, std = {std_i:.6g}") + print("") + +def _run_bsb_once(model: MetabolicNetwork, tc: unittest.TestCase, tag: str): + """Runs one Billiard Shake-and-Bake phase""" + sampler = PolytopeSampler(model) + warnings.filterwarnings("ignore", category=DeprecationWarning) + + A, b, N, N_shift = sampler.get_polytope() + d_eff = A.shape[1] + n_rxns = N.shape[0] + tc.assertGreater(d_eff, 0, "Effective dimension is zero") + + t0 = time.perf_counter() + steady_states, diag = sampler.generate_steady_states_sb_once( + n=N_SAMPLES, burn_in=BURN_IN, sampler="billiard_shake_and_bake", nreflections=NREFL + ) + elapsed = time.perf_counter() - t0 + + _print_diag(diag, elapsed, f"{tag} :: BILLIARD_SHAKE_AND_BAKE") + _print_samples_summary(steady_states, f"{tag} :: BILLIARD_SHAKE_AND_BAKE") + + + + # sanity + tc.assertEqual(steady_states.shape[0], n_rxns) + tc.assertTrue(np.isfinite(diag["minESS"])) + tc.assertGreater(diag["minESS"], 0) + + del steady_states, sampler, A, b, N, N_shift + gc.collect() + +class TestBilliardShakeAndBake(unittest.TestCase): + def test_ecoli_core_json(self): + input_file = os.path.join(os.getcwd(), "ext_data", "e_coli_core.json") + model = MetabolicNetwork.from_json(input_file) + _run_bsb_once(model, self, tag="e_coli_core.json") + + def test_ecoli_core_mat(self): + input_file = os.path.join(os.getcwd(), "ext_data", "e_coli_core.mat") + model = MetabolicNetwork.from_mat(input_file) + _run_bsb_once(model, self, tag="e_coli_core.mat") + + def test_ecoli_core_sbml(self): + input_file = os.path.join(os.getcwd(), "ext_data", "e_coli_core.xml") + model = MetabolicNetwork.from_sbml(input_file) + _run_bsb_once(model, self, tag="e_coli_core.xml") + +if __name__ == "__main__": + if len(sys.argv) > 1: + set_default_solver(sys.argv[1]) + sys.argv.pop(1) + unittest.main() diff --git a/volesti b/volesti index c3109bba..a33e5f45 160000 --- a/volesti +++ b/volesti @@ -1 +1 @@ -Subproject commit c3109bba06a9b623446bdde4c5fadb02722de876 +Subproject commit a33e5f450a8badb506384d7a619344dca855fe75