Skip to content

Commit 976d74c

Browse files
committed
Merge remote-tracking branch 'upstream/nightly' into nightly
2 parents fcb89a4 + c636400 commit 976d74c

File tree

7 files changed

+618
-50
lines changed

7 files changed

+618
-50
lines changed

examples/solver_judge_tinker/train_solver_judge_flow_tinker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ set -x
22

33
MODEL_PATH=Qwen/Qwen3-4B-Instruct-2507
44

5-
python3 -m examples.solver_judge_tinker.train_solver_judge_flow_tinker \
5+
python -m examples.solver_judge_tinker.train_solver_judge_flow_tinker \
66
model.name=$MODEL_PATH \
77
model.lora_rank=32 \
88
training.group_size=4 \

pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ strands = [
9999
"strands-agents",
100100
]
101101

102+
tinker = [
103+
"tinker",
104+
"tinker-cookbook @ git+https://github.com/thinking-machines-lab/tinker-cookbook.git#egg=tinker-cookbook",
105+
]
106+
102107
[tool.ruff]
103108
line-length = 5000 # TODO: Reduce this to a more reasonable value
104109

rllm/engine/agent_workflow_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@
1111

1212
from rllm.agents.agent import Episode
1313
from rllm.engine.rollout import ModelOutput, RolloutEngine
14-
from rllm.engine.rollout.verl_engine import VerlEngine
1514
from rllm.misc import colorful_print
1615
from rllm.workflows.workflow import TerminationReason, Workflow
1716

1817
# Avoid hard dependency on verl at import time; only for typing
1918
if TYPE_CHECKING:
19+
from rllm.engine.rollout.verl_engine import VerlEngine
2020
from verl import DataProto
2121

2222
logger = logging.getLogger(__name__)

rllm/trainer/agent_trainer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import ray
44

55
from rllm.data import Dataset
6-
from rllm.trainer.verl.ray_runtime_env import get_ppo_ray_runtime_env
7-
from rllm.trainer.verl.train_agent_ppo import TaskRunner
86

97

108
class AgentTrainer:
@@ -101,6 +99,9 @@ def _train_tinker(self):
10199
trainer.fit_agent()
102100

103101
def _train_verl(self):
102+
from rllm.trainer.verl.ray_runtime_env import get_ppo_ray_runtime_env
103+
from rllm.trainer.verl.train_agent_ppo import TaskRunner
104+
104105
# Check if Ray is not initialized
105106
if not ray.is_initialized():
106107
# read off all the `ray_init` settings from the config

rllm/trainer/tinker/tinker_agent_trainer.py

Lines changed: 112 additions & 46 deletions
Large diffs are not rendered by default.

rllm/utils/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)