Skip to content

Commit 18a2c72

Browse files
committed
match wandb run
1 parent 4dd07f5 commit 18a2c72

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

examples/solver_judge/train_solver_judge_flow.sh

100644100755
Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ python3 -m examples.solver_judge.train_solver_judge_flow \
1010
data.train_batch_size=64 \
1111
data.max_prompt_length=2048 \
1212
data.max_response_length=1024 \
13-
actor_rollout_ref.model.path=Qwen/Qwen3-0.6B \
13+
actor_rollout_ref.model.path=Qwen/Qwen3-4B-Instruct-2507 \
1414
actor_rollout_ref.actor.optim.lr=1e-6 \
1515
actor_rollout_ref.model.use_remove_padding=True \
1616
actor_rollout_ref.actor.loss_agg_mode=seq-mean-token-mean \
@@ -31,12 +31,13 @@ python3 -m examples.solver_judge.train_solver_judge_flow \
3131
actor_rollout_ref.rollout.name=vllm \
3232
actor_rollout_ref.rollout.mode="async" \
3333
actor_rollout_ref.rollout.enforce_eager=False \
34-
actor_rollout_ref.rollout.temperature=0.6 \
34+
actor_rollout_ref.rollout.temperature=1.0 \
35+
actor_rollout_ref.rollout.top_p=1.0 \
3536
actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
3637
actor_rollout_ref.rollout.n=4 \
3738
actor_rollout_ref.rollout.val_kwargs.n=1 \
38-
actor_rollout_ref.rollout.val_kwargs.temperature=0.6 \
39-
actor_rollout_ref.rollout.val_kwargs.top_p=0.95 \
39+
actor_rollout_ref.rollout.val_kwargs.temperature=1.0 \
40+
actor_rollout_ref.rollout.val_kwargs.top_p=1.0 \
4041
actor_rollout_ref.ref.fsdp_config.param_offload=True \
4142
algorithm.adv_estimator=grpo \
4243
rllm.compact_filtering.enable=False \
@@ -59,6 +60,7 @@ python3 -m examples.solver_judge.train_solver_judge_flow \
5960
trainer.test_freq=10 \
6061
trainer.default_hdfs_dir=null \
6162
trainer.total_epochs=100 \
62-
rllm.workflow.use_workflow=True
63+
rllm.workflow.use_workflow=True \
64+
+ray_init._temp_dir=/home/tianhao/tmp
6365

6466
pkill -9 -f 'ray::WorkerDict'

0 commit comments

Comments
 (0)