rllm-org
diff --git a/‎examples/solver_judge_tinker/train_solver_judge_flow_tinker.sh‎
Lines changed: 1 addition & 1 deletion b/‎examples/solver_judge_tinker/train_solver_judge_flow_tinker.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rllm/engine/agent_workflow_engine.py‎
Lines changed: 1 addition & 1 deletion b/‎rllm/engine/agent_workflow_engine.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rllm/trainer/agent_trainer.py‎
Lines changed: 28 additions & 13 deletions b/‎rllm/trainer/agent_trainer.py‎
Lines changed: 28 additions & 13 deletions
@@ -2,7 +2,7 @@ set -x
 
 MODEL_PATH=Qwen/Qwen3-4B-Instruct-2507
 
-python3 -m examples.solver_judge_tinker.train_solver_judge_flow_tinker \
+python -m examples.solver_judge_tinker.train_solver_judge_flow_tinker \
     model.name=$MODEL_PATH \
     model.lora_rank=32 \
     training.group_size=4 \
 
@@ -11,12 +11,12 @@
 
 from rllm.agents.agent import Episode
 from rllm.engine.rollout import ModelOutput, RolloutEngine
-from rllm.engine.rollout.verl_engine import VerlEngine
 from rllm.misc import colorful_print
 from rllm.workflows.workflow import TerminationReason, Workflow
 
 # Avoid hard dependency on verl at import time; only for typing
 if TYPE_CHECKING:
+    from rllm.engine.rollout.verl_engine import VerlEngine
     from verl import DataProto
 
 logger = logging.getLogger(__name__)
 
@@ -3,8 +3,6 @@
 import ray
 
 from rllm.data import Dataset
-from rllm.trainer.verl.ray_runtime_env import get_ppo_ray_runtime_env
-from rllm.trainer.verl.train_agent_ppo import TaskRunner
 
 
 class AgentTrainer:
@@ -42,13 +40,21 @@ def __init__(
         """
         if workflow_class is not None:
             if agent_class is not None:
-                raise ValueError("agent_class is not supported when using workflow, instead use workflow_args['agent_cls']")
+                raise ValueError(
+                    "agent_class is not supported when using workflow, instead use workflow_args['agent_cls']"
+                )
             if agent_args is not None:
-                raise ValueError("agent_args is not supported when using workflow, instead use workflow_args['agent_args']")
+                raise ValueError(
+                    "agent_args is not supported when using workflow, instead use workflow_args['agent_args']"
+                )
             if env_class is not None:
-                raise ValueError("env_class is not supported when using workflow, instead use workflow_args['env_cls']")
+                raise ValueError(
+                    "env_class is not supported when using workflow, instead use workflow_args['env_cls']"
+                )
             if env_args is not None:
-                raise ValueError("env_args is not supported when using workflow, instead use workflow_args['env_args']")
+                raise ValueError(
+                    "env_args is not supported when using workflow, instead use workflow_args['env_args']"
+                )
 
         self.workflow_class = workflow_class
         self.workflow_args = workflow_args or {}
@@ -63,11 +69,15 @@ def __init__(
         self.val_dataset = val_dataset
         self.backend = backend
 
-        assert self.backend in ["verl", "tinker"], f"Unsupported backend: {self.backend}, must be one of ['verl', 'tinker']"
+        assert self.backend in [
+            "verl", "tinker"
+        ], f"Unsupported backend: {self.backend}, must be one of ['verl', 'tinker']"
 
-        if train_dataset is not None and self.config is not None and hasattr(self.config, "data"):
+        if train_dataset is not None and self.config is not None and hasattr(
+                self.config, "data"):
             self.config.data.train_files = train_dataset.get_verl_data_path()
-        if val_dataset is not None and self.config is not None and hasattr(self.config, "data"):
+        if val_dataset is not None and self.config is not None and hasattr(
+                self.config, "data"):
             self.config.data.val_files = val_dataset.get_verl_data_path()
 
     def train(self):
@@ -101,14 +111,20 @@ def _train_tinker(self):
         trainer.fit_agent()
 
     def _train_verl(self):
+        from rllm.trainer.verl.ray_runtime_env import get_ppo_ray_runtime_env
+        from rllm.trainer.verl.train_agent_ppo import TaskRunner
         # Check if Ray is not initialized
         if not ray.is_initialized():
             # read off all the `ray_init` settings from the config
             if self.config is not None and hasattr(self.config, "ray_init"):
-                ray_init_settings = {k: v for k, v in self.config.ray_init.items() if v is not None}
+                ray_init_settings = {
+                    k: v
+                    for k, v in self.config.ray_init.items() if v is not None
+                }
             else:
                 ray_init_settings = {}
-            ray.init(runtime_env=get_ppo_ray_runtime_env(), **ray_init_settings)
+            ray.init(runtime_env=get_ppo_ray_runtime_env(),
+                     **ray_init_settings)
 
         runner = TaskRunner.remote()
 
@@ -121,5 +137,4 @@ def _train_verl(self):
                 env_class=self.env_class,
                 agent_args=self.agent_args,
                 env_args=self.env_args,
-            )
-        )
+            ))