bayesflow-org
diff --git a/‎bayesflow/adapters/transforms/standardize.py‎
Lines changed: 1 addition & 1 deletion b/‎bayesflow/adapters/transforms/standardize.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bayesflow/approximators/continuous_approximator.py‎
Lines changed: 1 addition & 1 deletion b/‎bayesflow/approximators/continuous_approximator.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bayesflow/datasets/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎bayesflow/datasets/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎bayesflow/datasets/disk_dataset.py‎
Lines changed: 52 additions & 6 deletions b/‎bayesflow/datasets/disk_dataset.py‎
Lines changed: 52 additions & 6 deletions
diff --git a/‎bayesflow/datasets/offline_dataset.py‎
Lines changed: 60 additions & 2 deletions b/‎bayesflow/datasets/offline_dataset.py‎
Lines changed: 60 additions & 2 deletions
diff --git a/‎bayesflow/datasets/online_dataset.py‎
Lines changed: 56 additions & 1 deletion b/‎bayesflow/datasets/online_dataset.py‎
Lines changed: 56 additions & 1 deletion
diff --git a/‎bayesflow/datasets/rounds_dataset.py‎
Lines changed: 0 additions & 66 deletions b/‎bayesflow/datasets/rounds_dataset.py‎
Lines changed: 0 additions & 66 deletions
diff --git a/‎bayesflow/experimental/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎bayesflow/experimental/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎bayesflow/experimental/diffusion_model/__init__.py‎
Lines changed: 9 additions & 0 deletions b/‎bayesflow/experimental/diffusion_model/__init__.py‎
Lines changed: 9 additions & 0 deletions
@@ -123,7 +123,7 @@ def inverse(self, data: np.ndarray, **kwargs) -> np.ndarray:
 
     def log_det_jac(self, data, inverse: bool = False, **kwargs) -> np.ndarray:
         std = np.broadcast_to(self.std, data.shape)
-        ldj = np.log(np.abs(std))
+        ldj = -np.log(np.abs(std))
         if inverse:
             ldj = -ldj
         return np.sum(ldj, axis=tuple(range(1, ldj.ndim)))
@@ -458,7 +458,7 @@ def log_prob(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray | dic
         # change of variables formula
         log_det_jac = log_det_jac.get("inference_variables")
         if log_det_jac is not None:
-            log_prob = log_prob + log_det_jac
+            log_prob = keras.tree.map_structure(lambda x: x + log_det_jac, log_prob)
 
         return log_prob
 
 
@@ -7,7 +7,6 @@
 from .offline_dataset import OfflineDataset
 from .online_dataset import OnlineDataset
 from .disk_dataset import DiskDataset
-from .rounds_dataset import RoundsDataset
 
 from ..utils._docs import _add_imports_to_all
 
 
@@ -1,8 +1,12 @@
-import keras
-import numpy as np
+from collections.abc import Mapping, Callable
+
 import os
 import pathlib as pl
 
+import numpy as np
+
+import keras
+
 from bayesflow.adapters import Adapter
 from bayesflow.utils import tree_stack, pickle_load
 
@@ -29,11 +33,43 @@ def __init__(
         *,
         pattern: str = "*.pkl",
         batch_size: int,
-        load_fn: callable = None,
+        load_fn: Callable = None,
         adapter: Adapter | None,
         stage: str = "training",
+        augmentations: Mapping[str, Callable] | Callable = None,
         **kwargs,
     ):
+        """
+        Initialize a DiskDataset instance for offline training using a set of simulations that
+        do not fit on disk.
+
+        Parameters
+        ----------
+        root : os.PathLike
+            Root directory containing the sample files.
+        pattern : str, default="*.pkl"
+            Glob pattern to match sample files.
+        batch_size : int
+            Number of samples per batch.
+        load_fn : Callable, optional
+            Function to load a single file into a sample. Defaults to `pickle_load`.
+        adapter : Adapter or None
+            Optional adapter to transform the loaded batch.
+        stage : str, default="training"
+            Current stage (e.g., "training", "validation", etc.) used by the adapter.
+        augmentations : dict of str to Callable or Callable, optional
+            Dictionary of augmentation functions to apply to each corresponding key in the batch
+            or a function to apply to the entire batch (possibly adding new keys).
+
+            If you provide a dictionary of functions, each function should accept one element
+            of your output batch and return the corresponding transformed element. Otherwise,
+            your function should accept the entire dictionary output and return a dictionary.
+
+            Note - augmentations are applied before the adapter is called and are generally
+            transforms that you only want to apply during training.
+        **kwargs
+            Additional keyword arguments passed to the base `PyDataset`.
+        """
         super().__init__(**kwargs)
         self.batch_size = batch_size
         self.root = pl.Path(root)
@@ -42,6 +78,8 @@ def __init__(
         self.files = list(map(str, self.root.glob(pattern)))
         self.stage = stage
 
+        self.augmentations = augmentations
+
         self.shuffle()
 
     def __getitem__(self, item) -> dict[str, np.ndarray]:
@@ -50,12 +88,20 @@ def __getitem__(self, item) -> dict[str, np.ndarray]:
 
         files = self.files[item * self.batch_size : (item + 1) * self.batch_size]
 
-        batch = []
-        for file in files:
-            batch.append(self.load_fn(file))
+        batch = [self.load_fn(file) for file in files]
 
         batch = tree_stack(batch)
 
+        if self.augmentations is None:
+            pass
+        elif isinstance(self.augmentations, Mapping):
+            for key, fn in self.augmentations.items():
+                batch[key] = fn(batch[key])
+        elif isinstance(self.augmentations, Callable):
+            batch = self.augmentations(batch)
+        else:
+            raise RuntimeError(f"Could not apply augmentations of type {type(self.augmentations)}.")
+
         if self.adapter is not None:
             batch = self.adapter(batch, stage=self.stage)
 
 
@@ -1,4 +1,4 @@
-from collections.abc import Mapping
+from collections.abc import Mapping, Callable
 
 import numpy as np
 
@@ -23,8 +23,37 @@ def __init__(
         num_samples: int = None,
         *,
         stage: str = "training",
+        augmentations: Mapping[str, Callable] | Callable = None,
         **kwargs,
     ):
+        """
+        Initialize an OfflineDataset instance for offline training with optional data augmentations.
+
+        Parameters
+        ----------
+        data : Mapping[str, np.ndarray]
+            Pre-simulated data stored in a dictionary, where each key maps to a NumPy array.
+        batch_size : int
+            Number of samples per batch.
+        adapter : Adapter or None
+            Optional adapter to transform the batch.
+        num_samples : int, optional
+            Number of samples in the dataset. If None, it will be inferred from the data.
+        stage : str, default="training"
+            Current stage (e.g., "training", "validation", etc.) used by the adapter.
+        augmentations : dict of str to Callable or Callable, optional
+            Dictionary of augmentation functions to apply to each corresponding key in the batch
+            or a function to apply to the entire batch (possibly adding new keys).
+
+            If you provide a dictionary of functions, each function should accept one element
+            of your output batch and return the corresponding transformed element. Otherwise,
+            your function should accept the entire dictionary output and return a dictionary.
+
+            Note - augmentations are applied before the adapter is called and are generally
+            transforms that you only want to apply during training.
+        **kwargs
+            Additional keyword arguments passed to the base `PyDataset`.
+        """
         super().__init__(**kwargs)
         self.batch_size = batch_size
         self.data = data
@@ -39,10 +68,29 @@ def __init__(
 
         self.indices = np.arange(self.num_samples, dtype="int64")
 
+        self.augmentations = augmentations
+
         self.shuffle()
 
     def __getitem__(self, item: int) -> dict[str, np.ndarray]:
-        """Get a batch of pre-simulated data"""
+        """
+        Load a batch of data from disk.
+
+        Parameters
+        ----------
+        item : int
+            Index of the batch to retrieve.
+
+        Returns
+        -------
+        dict of str to np.ndarray
+            A batch of loaded (and optionally augmented/adapted) data.
+
+        Raises
+        ------
+        IndexError
+            If the requested batch index is out of range.
+        """
         if not 0 <= item < self.num_batches:
             raise IndexError(f"Index {item} is out of bounds for dataset with {self.num_batches} batches.")
 
@@ -54,6 +102,16 @@ def __getitem__(self, item: int) -> dict[str, np.ndarray]:
             for key, value in self.data.items()
         }
 
+        if self.augmentations is None:
+            pass
+        elif isinstance(self.augmentations, Mapping):
+            for key, fn in self.augmentations.items():
+                batch[key] = fn(batch[key])
+        elif isinstance(self.augmentations, Callable):
+            batch = self.augmentations(batch)
+        else:
+            raise RuntimeError(f"Could not apply augmentations of type {type(self.augmentations)}.")
+
         if self.adapter is not None:
             batch = self.adapter(batch, stage=self.stage)
 
 
@@ -1,3 +1,5 @@
+from collections.abc import Mapping, Callable
+
 import keras
 import numpy as np
 
@@ -7,7 +9,7 @@
 
 class OnlineDataset(keras.utils.PyDataset):
     """
-    A dataset that is generated on-the-fly.
+    A dataset that generates simulations on-the-fly.
     """
 
     def __init__(
@@ -18,19 +20,72 @@ def __init__(
         adapter: Adapter | None,
         *,
         stage: str = "training",
+        augmentations: Mapping[str, Callable] | Callable = None,
         **kwargs,
     ):
+        """
+        Initialize an OnlineDataset instance for infinite stream training.
+
+        Parameters
+        ----------
+        simulator : Simulator
+            A simulator object with a `.sample(batch_shape)` method to generate data.
+        batch_size : int
+            Number of samples per batch.
+        num_batches : int
+            Total number of batches in the dataset.
+        adapter : Adapter or None
+            Optional adapter to transform the simulated batch.
+        stage : str, default="training"
+            Current stage (e.g., "training", "validation", etc.) used by the adapter.
+        augmentations : dict of str to Callable or Callable, optional
+            Dictionary of augmentation functions to apply to each corresponding key in the batch
+            or a function to apply to the entire batch (possibly adding new keys).
+
+            If you provide a dictionary of functions, each function should accept one element
+            of your output batch and return the corresponding transformed element. Otherwise,
+            your function should accept the entire dictionary output and return a dictionary.
+
+            Note - augmentations are applied before the adapter is called and are generally
+            transforms that you only want to apply during training.
+        **kwargs
+            Additional keyword arguments passed to the base `PyDataset`.
+        """
         super().__init__(**kwargs)
 
         self.batch_size = batch_size
         self._num_batches = num_batches
         self.adapter = adapter
         self.simulator = simulator
         self.stage = stage
+        self.augmentations = augmentations
 
     def __getitem__(self, item: int) -> dict[str, np.ndarray]:
+        """
+        Generate one batch of data.
+
+        Parameters
+        ----------
+        item : int
+            Index of the batch. Required by signature, but not used.
+
+        Returns
+        -------
+        dict of str to np.ndarray
+            A batch of simulated (and optionally augmented/adapted) data.
+        """
         batch = self.simulator.sample((self.batch_size,))
 
+        if self.augmentations is None:
+            pass
+        elif isinstance(self.augmentations, Mapping):
+            for key, fn in self.augmentations.items():
+                batch[key] = fn(batch[key])
+        elif isinstance(self.augmentations, Callable):
+            batch = self.augmentations(batch)
+        else:
+            raise RuntimeError(f"Could not apply augmentations of type {type(self.augmentations)}.")
+
         if self.adapter is not None:
             batch = self.adapter(batch, stage=self.stage)
 
 
@@ -4,8 +4,9 @@
 
 from .cif import CIF
 from .continuous_time_consistency_model import ContinuousTimeConsistencyModel
+from .diffusion_model import DiffusionModel
 from .free_form_flow import FreeFormFlow
 
 from ..utils._docs import _add_imports_to_all
 
-_add_imports_to_all(include_modules=[])
+_add_imports_to_all(include_modules=["diffusion_model"])
@@ -0,0 +1,9 @@
+from .diffusion_model import DiffusionModel
+from .noise_schedule import NoiseSchedule
+from .cosine_noise_schedule import CosineNoiseSchedule
+from .edm_noise_schedule import EDMNoiseSchedule
+from .dispatch import find_noise_schedule
+
+from ...utils._docs import _add_imports_to_all
+
+_add_imports_to_all(include_modules=[])