Update

vmoens · vmoens · commit 5364dfff4026 · 2025-10-24T17:54:52.000-07:00
[ghstack-poisoned]
diff --git a/test/test_configs.py b/test/test_configs.py
@@ -835,7 +835,10 @@ def test_tensor_dict_module_config(self):
             in_keys=["observation"],
             out_keys=["action"],
         )
-        assert cfg._target_ == "tensordict.nn.TensorDictModule"
+        assert (
+            cfg._target_
+            == "torchrl.trainers.algorithms.configs.modules._make_tensordict_module"
+        )
         assert cfg.module._target_ == "torchrl.modules.MLP"
         assert cfg.in_keys == ["observation"]
         assert cfg.out_keys == ["action"]
diff --git a/torchrl/trainers/algorithms/configs/modules.py b/torchrl/trainers/algorithms/configs/modules.py
@@ -222,7 +222,7 @@ class TensorDictModuleConfig(ModelConfig):
     """
 
     module: MLPConfig = MISSING
-    _target_: str = "tensordict.nn.TensorDictModule"
+    _target_: str = "torchrl.trainers.algorithms.configs.modules._make_tensordict_module"
     _partial_: bool = False
 
     def __post_init__(self) -> None:
@@ -292,6 +292,30 @@ def __post_init__(self) -> None:
         super().__post_init__()
 
 
+def _make_tensordict_module(*args, **kwargs):
+    """Helper function to create a TensorDictModule."""
+    from hydra.utils import instantiate
+    from tensordict.nn import TensorDictModule
+
+    module = kwargs.pop("module")
+    shared = kwargs.pop("shared", False)
+
+    # Instantiate the module if it's a config
+    if hasattr(module, "_target_"):
+        module = instantiate(module)
+    elif callable(module) and hasattr(module, "func"):  # partial function
+        module = module()
+
+    # Create the TensorDictModule
+    tensordict_module = TensorDictModule(module, **kwargs)
+
+    # Apply share_memory if needed
+    if shared:
+        tensordict_module = tensordict_module.share_memory()
+
+    return tensordict_module
+
+
 def _make_tanh_normal_model(*args, **kwargs):
     """Helper function to create a TanhNormal model with ProbabilisticTensorDictSequential."""
     from hydra.utils import instantiate
@@ -351,10 +375,24 @@ def _make_tanh_normal_model(*args, **kwargs):
 
 def _make_value_model(*args, **kwargs):
     """Helper function to create a ValueOperator with the given network."""
+    from hydra.utils import instantiate
+
     from torchrl.modules import ValueOperator
 
     network = kwargs.pop("network")
     shared = kwargs.pop("shared", False)
+
+    # Instantiate the network if it's a config
+    if hasattr(network, "_target_"):
+        network = instantiate(network)
+    elif callable(network) and hasattr(network, "func"):  # partial function
+        network = network()
+
+    # Create the ValueOperator
+    value_operator = ValueOperator(network, **kwargs)
+
+    # Apply share_memory if needed
     if shared:
-        network = network.share_memory()
-    return ValueOperator(network, **kwargs)
+        value_operator = value_operator.share_memory()
+
+    return value_operator
diff --git a/torchrl/trainers/algorithms/ppo.py b/torchrl/trainers/algorithms/ppo.py
@@ -212,7 +212,6 @@ def __init__(
 
             if not self.async_collection:
                 # rb has been extended by the collector
-                raise RuntimeError
                 self.register_op("pre_epoch", rb_trainer.extend)
             self.register_op("process_optim_batch", rb_trainer.sample)
             self.register_op("post_loss", rb_trainer.update_priority)