Merge branch 'dev' of https://github.com/bayesflow-org/bayesflow into dev

stefanradev93 · stefanradev93 · commit 0ea79d7edf43 · 2025-07-11T09:00:04.000-04:00
diff --git a/bayesflow/adapters/transforms/broadcast.py b/bayesflow/adapters/transforms/broadcast.py
@@ -117,7 +117,7 @@ def forward(self, data: dict[str, np.ndarray], **kwargs) -> dict[str, np.ndarray
                 data[k] = np.expand_dims(data[k], axis=tuple(np.arange(0, len_diff)))
             elif self.expand == "right":
                 data[k] = np.expand_dims(data[k], axis=tuple(-np.arange(1, len_diff + 1)))
-            elif isinstance(self.expand, tuple):
+            elif isinstance(self.expand, Sequence):
                 if len(self.expand) is not len_diff:
                     raise ValueError("Length of `expand` must match the length difference of the involed arrays.")
                 data[k] = np.expand_dims(data[k], axis=self.expand)
diff --git a/bayesflow/diagnostics/plots/calibration_ecdf.py b/bayesflow/diagnostics/plots/calibration_ecdf.py
@@ -1,6 +1,7 @@
-from collections.abc import Mapping, Sequence
+from collections.abc import Callable, Mapping, Sequence
 
 import numpy as np
+import keras
 import matplotlib.pyplot as plt
 
 from ...utils.plot_utils import prepare_plot_data, add_titles_and_labels, prettify_subplots
@@ -13,6 +14,7 @@ def calibration_ecdf(
     targets: Mapping[str, np.ndarray] | np.ndarray,
     variable_keys: Sequence[str] = None,
     variable_names: Sequence[str] = None,
+    test_quantities: dict[str, Callable] = None,
     difference: bool = False,
     stacked: bool = False,
     rank_type: str | np.ndarray = "fractional",
@@ -78,6 +80,18 @@ def calibration_ecdf(
     variable_names    : list or None, optional, default: None
         The parameter names for nice plot titles.
         Inferred if None. Only relevant if `stacked=False`.
+    test_quantities   : dict or None, optional, default: None
+        A dict that maps plot titles to functions that compute
+        test quantities based on estimate/target draws.
+
+        The dict keys are automatically added to ``variable_keys``
+        and ``variable_names``.
+        Test quantity functions are expected to accept a dict of draws with
+        shape ``(batch_size, ...)`` as the first (typically only)
+        positional argument and return an NumPy array of shape
+        ``(batch_size,)``.
+        The functions do not have to deal with an additional
+        sample dimension, as appropriate reshaping is done internally.
     figsize           : tuple or None, optional, default: None
         The figure size passed to the matplotlib constructor.
         Inferred if None.
@@ -120,6 +134,36 @@ def calibration_ecdf(
         If an unknown `rank_type` is passed.
     """
 
+    # Optionally, compute and prepend test quantities from draws
+    if test_quantities is not None:
+        test_quantities_estimates = {}
+        test_quantities_targets = {}
+
+        for key, test_quantity_fn in test_quantities.items():
+            # Apply test_quantity_func to ground-truths
+            tq_targets = test_quantity_fn(data=targets)
+            test_quantities_targets[key] = np.expand_dims(tq_targets, axis=1)
+
+            # # Flatten estimates for batch processing in test_quantity_fn, apply function, and restore shape
+            num_conditions, num_samples = next(iter(estimates.values())).shape[:2]
+            flattened_estimates = keras.tree.map_structure(lambda t: np.reshape(t, (-1, *t.shape[2:])), estimates)
+            flat_tq_estimates = test_quantity_fn(data=flattened_estimates)
+            test_quantities_estimates[key] = np.reshape(flat_tq_estimates, (num_conditions, num_samples, 1))
+
+        # Add custom test quantities to variable keys and names for plotting
+        # keys and names are set to the test_quantities dict keys
+        test_quantities_names = list(test_quantities.keys())
+
+        if variable_keys is None:
+            variable_keys = list(estimates.keys())
+
+        if isinstance(variable_names, list):
+            variable_names = test_quantities_names + variable_names
+
+        variable_keys = test_quantities_names + variable_keys
+        estimates = test_quantities_estimates | estimates
+        targets = test_quantities_targets | targets
+
     plot_data = prepare_plot_data(
         estimates=estimates,
         targets=targets,
diff --git a/bayesflow/experimental/cif/cif.py b/bayesflow/experimental/cif/cif.py
@@ -99,7 +99,7 @@ def _inverse(
     def compute_metrics(self, x: Tensor, conditions: Tensor = None, stage: str = "training") -> dict[str, Tensor]:
         base_metrics = super().compute_metrics(x, conditions=conditions, stage=stage)
 
-        elbo = self.log_prob(x, conditions=conditions)
+        elbo = self.log_prob(x, conditions=conditions, training=stage == "training")
 
         loss = -keras.ops.mean(elbo)
 
diff --git a/bayesflow/networks/coupling_flow/coupling_flow.py b/bayesflow/networks/coupling_flow/coupling_flow.py
@@ -183,7 +183,7 @@ def compute_metrics(
     ) -> dict[str, Tensor]:
         base_metrics = super().compute_metrics(x, conditions=conditions, stage=stage)
 
-        z, log_density = self(x, conditions=conditions, inverse=False, density=True)
+        z, log_density = self(x, conditions=conditions, inverse=False, density=True, training=stage == "training")
         loss = weighted_mean(-log_density, sample_weight)
 
         return base_metrics | {"loss": loss}
diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py
@@ -145,7 +145,7 @@ def call(
     def compute_metrics(
         self, x: Tensor, conditions: Tensor = None, sample_weight: Tensor = None, stage: str = "training"
     ) -> dict[str, Tensor]:
-        output = self(x, conditions)
+        output = self(x, conditions, training=stage == "training")
 
         metrics = {}
         # calculate negative score as mean over all scores
diff --git a/bayesflow/utils/dict_utils.py b/bayesflow/utils/dict_utils.py
@@ -282,6 +282,10 @@ def dicts_to_arrays(
         Ground-truth values corresponding to the estimates. Must match the structure and dimensionality
         of `estimates` in terms of first and last axis.
 
+    priors : dict[str, ndarray] or ndarray, optional (default = None)
+        Prior draws. Must match the structure and dimensionality
+        of `estimates` in terms of first and last axis.
+
     dataset_ids : Sequence of integers indexing the datasets to select (default = None).
         By default, use all datasets.
 
diff --git a/bayesflow/utils/optimal_transport/log_sinkhorn.py b/bayesflow/utils/optimal_transport/log_sinkhorn.py
@@ -8,10 +8,10 @@
 def log_sinkhorn(x1, x2, seed: int = None, **kwargs):
     """
     Log-stabilized version of :py:func:`~bayesflow.utils.optimal_transport.sinkhorn.sinkhorn`.
-    Significantly slower than the unstabilized version, so use only when you need numerical stability.
+    About 50% slower than the unstabilized version, so use only when you need numerical stability.
     """
     log_plan = log_sinkhorn_plan(x1, x2, **kwargs)
-    assignments = keras.random.categorical(keras.ops.exp(log_plan), num_samples=1, seed=seed)
+    assignments = keras.random.categorical(log_plan, num_samples=1, seed=seed)
     assignments = keras.ops.squeeze(assignments, axis=1)
 
     return assignments
@@ -20,19 +20,25 @@ def log_sinkhorn(x1, x2, seed: int = None, **kwargs):
 def log_sinkhorn_plan(x1, x2, regularization: float = 1.0, rtol=1e-5, atol=1e-8, max_steps=None):
     """
     Log-stabilized version of :py:func:`~bayesflow.utils.optimal_transport.sinkhorn.sinkhorn_plan`.
-    Significantly slower than the unstabilized version, so use only when you need numerical stability.
+    About 50% slower than the unstabilized version, so use primarily when you need numerical stability.
     """
     cost = euclidean(x1, x2)
+    cost_scaled = -cost / regularization
 
-    log_plan = cost / -(regularization * keras.ops.mean(cost) + 1e-16)
+    # initialize transport plan from a gaussian kernel
+    log_plan = cost_scaled - keras.ops.max(cost_scaled)
+    n, m = keras.ops.shape(log_plan)
+
+    log_a = -keras.ops.log(n)
+    log_b = -keras.ops.log(m)
 
     def contains_nans(plan):
         return keras.ops.any(keras.ops.isnan(plan))
 
     def is_converged(plan):
-        # for convergence, the plan should be doubly stochastic
-        conv0 = keras.ops.all(keras.ops.isclose(keras.ops.logsumexp(plan, axis=0), 0.0, rtol=rtol, atol=atol))
-        conv1 = keras.ops.all(keras.ops.isclose(keras.ops.logsumexp(plan, axis=1), 0.0, rtol=rtol, atol=atol))
+        # for convergence, the target marginals must match
+        conv0 = keras.ops.all(keras.ops.isclose(keras.ops.logsumexp(plan, axis=0), log_b, rtol=0.0, atol=rtol + atol))
+        conv1 = keras.ops.all(keras.ops.isclose(keras.ops.logsumexp(plan, axis=1), log_a, rtol=0.0, atol=rtol + atol))
         return conv0 & conv1
 
     def cond(_, plan):
@@ -41,8 +47,8 @@ def cond(_, plan):
 
     def body(steps, plan):
         # Sinkhorn-Knopp: repeatedly normalize the transport plan along each dimension
-        plan = keras.ops.log_softmax(plan, axis=0)
-        plan = keras.ops.log_softmax(plan, axis=1)
+        plan = plan - keras.ops.logsumexp(plan, axis=0, keepdims=True) + log_b
+        plan = plan - keras.ops.logsumexp(plan, axis=1, keepdims=True) + log_a
 
         return steps + 1, plan
 
diff --git a/bayesflow/utils/optimal_transport/sinkhorn.py b/bayesflow/utils/optimal_transport/sinkhorn.py
@@ -11,7 +11,7 @@ def sinkhorn(x1: Tensor, x2: Tensor, seed: int = None, **kwargs) -> (Tensor, Ten
     """
     Matches elements from x2 onto x1 using the Sinkhorn-Knopp algorithm.
 
-    Sinkhorn-Knopp is an iterative algorithm that repeatedly normalizes the cost matrix into a doubly stochastic
+    Sinkhorn-Knopp is an iterative algorithm that repeatedly normalizes the cost matrix into a
     transport plan, containing assignment probabilities.
     The permutation is then sampled randomly according to the transport plan.
 
@@ -27,12 +27,15 @@ def sinkhorn(x1: Tensor, x2: Tensor, seed: int = None, **kwargs) -> (Tensor, Ten
     :param seed: Random seed to use for sampling indices.
         Default: None, which means the seed will be auto-determined for non-compiled contexts.
 
-    :return: Tensor of shape (m,)
+    :return: Tensor of shape (n,)
         Assignment indices for x2.
 
     """
     plan = sinkhorn_plan(x1, x2, **kwargs)
-    assignments = keras.random.categorical(plan, num_samples=1, seed=seed)
+
+    # we sample from log(plan) to receive assignments of length n, corresponding to indices of x2
+    # such that x2[assignments] matches x1
+    assignments = keras.random.categorical(keras.ops.log(plan), num_samples=1, seed=seed)
     assignments = keras.ops.squeeze(assignments, axis=1)
 
     return assignments
@@ -42,7 +45,7 @@ def sinkhorn_plan(
     x1: Tensor,
     x2: Tensor,
     regularization: float = 1.0,
-    max_steps: int = 10_000,
+    max_steps: int = None,
     rtol: float = 1e-5,
     atol: float = 1e-8,
 ) -> Tensor:
@@ -59,7 +62,7 @@ def sinkhorn_plan(
         Controls the standard deviation of the Gaussian kernel.
 
     :param max_steps: Maximum number of iterations, or None to run until convergence.
-        Default: 10_000
+        Default: None
 
     :param rtol: Relative tolerance for convergence.
         Default: 1e-5.
@@ -71,17 +74,20 @@ def sinkhorn_plan(
         The transport probabilities.
     """
     cost = euclidean(x1, x2)
+    cost_scaled = -cost / regularization
 
-    # initialize the transport plan from a gaussian kernel
-    plan = keras.ops.exp(cost / -(regularization * keras.ops.mean(cost) + 1e-16))
+    # initialize transport plan from a gaussian kernel
+    # (more numerically stable version of keras.ops.exp(-cost/regularization))
+    plan = keras.ops.exp(cost_scaled - keras.ops.max(cost_scaled))
+    n, m = keras.ops.shape(cost)
 
     def contains_nans(plan):
         return keras.ops.any(keras.ops.isnan(plan))
 
     def is_converged(plan):
-        # for convergence, the plan should be doubly stochastic
-        conv0 = keras.ops.all(keras.ops.isclose(keras.ops.sum(plan, axis=0), 1.0, rtol=rtol, atol=atol))
-        conv1 = keras.ops.all(keras.ops.isclose(keras.ops.sum(plan, axis=1), 1.0, rtol=rtol, atol=atol))
+        # for convergence, the target marginals must match
+        conv0 = keras.ops.all(keras.ops.isclose(keras.ops.sum(plan, axis=0), 1.0 / m, rtol=rtol, atol=atol))
+        conv1 = keras.ops.all(keras.ops.isclose(keras.ops.sum(plan, axis=1), 1.0 / n, rtol=rtol, atol=atol))
         return conv0 & conv1
 
     def cond(_, plan):
@@ -90,8 +96,8 @@ def cond(_, plan):
 
     def body(steps, plan):
         # Sinkhorn-Knopp: repeatedly normalize the transport plan along each dimension
-        plan = keras.ops.softmax(plan, axis=0)
-        plan = keras.ops.softmax(plan, axis=1)
+        plan = plan / keras.ops.sum(plan, axis=0, keepdims=True) * (1.0 / m)
+        plan = plan / keras.ops.sum(plan, axis=1, keepdims=True) * (1.0 / n)
 
         return steps + 1, plan
 
diff --git a/bayesflow/utils/plot_utils.py b/bayesflow/utils/plot_utils.py
@@ -23,7 +23,7 @@ def prepare_plot_data(
     figsize: tuple = None,
     stacked: bool = False,
     default_name: str = "v",
-) -> Mapping[str, Any]:
+) -> dict[str, Any]:
     """
     Procedural wrapper that encompasses all preprocessing steps, including shape-checking, parameter name
     generation, layout configuration, figure initialization, and collapsing of axes.
@@ -56,6 +56,12 @@ def prepare_plot_data(
         Whether the plots are stacked horizontally
     default_name      : str, optional (default = "v")
         The default name to use for estimates if None provided
+
+    Returns
+    -------
+    plot_data : dict[str, Any]
+        A dictionary containing all preprocessed data and plotting objects required for visualization,
+        including estimates, targets, variable names, figure, axes, and layout configuration.
     """
 
     plot_data = dicts_to_arrays(
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,16 +36,16 @@ dependencies = [
 [project.optional-dependencies]
 all = [
     # dev
+    "ipython",
+    "ipykernel",
     "jupyter",
     "jupyterlab",
+    "line-profiler",
     "nbconvert",
-    "ipython",
-    "ipykernel",
     "pre-commit",
     "ruff",
     "tox",
     # docs
-
     "myst-nb ~= 1.2",
     "numpydoc ~= 1.8",
     "pydata-sphinx-theme ~= 0.16",
@@ -63,6 +63,7 @@ all = [
 dev = [
     "jupyter",
     "jupyterlab",
+    "line-profiler",
     "pre-commit",
     "ruff",
     "tox",
diff --git a/tests/test_diagnostics/test_diagnostics_plots.py b/tests/test_diagnostics/test_diagnostics_plots.py
@@ -1,4 +1,5 @@
 import bayesflow as bf
+import numpy as np
 import pytest
 
 
@@ -16,6 +17,8 @@ def test_backend():
 
 
 def test_calibration_ecdf(random_estimates, random_targets, var_names):
+    print(random_estimates, random_targets, var_names)
+
     # basic functionality: automatic variable names
     out = bf.diagnostics.plots.calibration_ecdf(random_estimates, random_targets)
     assert len(out.axes) == num_variables(random_estimates)
@@ -46,6 +49,22 @@ def test_calibration_ecdf(random_estimates, random_targets, var_names):
     # cannot infer the variable names from an array so default names are used
     assert out.axes[1].title._text == "v_1"
 
+    # test quantities plots are shown
+    test_quantities = {
+        r"$\beta_1 + \beta_2$": lambda data: np.sum(data["beta"], axis=-1),
+        r"$\beta_1 \cdot \beta_2$": lambda data: np.prod(data["beta"], axis=-1),
+    }
+    out = bf.diagnostics.plots.calibration_ecdf(random_estimates, random_targets, test_quantities=test_quantities)
+    assert len(out.axes) == len(test_quantities) + num_variables(random_estimates)
+    assert out.axes[1].title._text == r"$\beta_1 \cdot \beta_2$"
+    assert out.axes[-1].title._text == r"sigma"
+
+    # test plot titles changed to variable_names in case test quantities exist
+    out = bf.diagnostics.plots.calibration_ecdf(
+        random_estimates, random_targets, test_quantities=test_quantities, variable_names=var_names
+    )
+    assert out.axes[-1].title._text == r"$\sigma$"
+
 
 def test_calibration_histogram(random_estimates, random_targets):
     # basic functionality: automatic variable names
diff --git a/tests/test_utils/test_optimal_transport.py b/tests/test_utils/test_optimal_transport.py