diff --git a/examples/solver_judge/solver_judge_flow_colab.ipynb b/examples/solver_judge/solver_judge_flow_colab.ipynb index 3f3643bd1..76e44220c 100644 --- a/examples/solver_judge/solver_judge_flow_colab.ipynb +++ b/examples/solver_judge/solver_judge_flow_colab.ipynb @@ -87,7 +87,7 @@ }, "outputs": [], "source": [ - "!pip install \"transformers[hf_xet]>=4.51.0\" accelerate datasets peft hf-transfer \\\n", + "!pip install \"transformers[hf_xet]>=4.57.0\" accelerate datasets peft hf-transfer \\\n", " \"numpy<2.0.0\" \"pyarrow>=15.0.0\" pandas \\\n", " ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler \\\n", " pytest py-spy pyext pre-commit ruff tensorboard\n", diff --git a/rllm/trainer/verl/agent_workflow_trainer.py b/rllm/trainer/verl/agent_workflow_trainer.py index bcd6eecd5..859168ccf 100644 --- a/rllm/trainer/verl/agent_workflow_trainer.py +++ b/rllm/trainer/verl/agent_workflow_trainer.py @@ -38,15 +38,16 @@ def __init__( self, config, tokenizer, - role_worker_mapping: dict[Role, WorkerType], - resource_pool_manager: ResourcePoolManager, + processor=None, + role_worker_mapping: dict[Role, WorkerType] = None, + resource_pool_manager: ResourcePoolManager = None, ray_worker_group_cls: RayWorkerGroup = RayWorkerGroup, reward_fn=None, val_reward_fn=None, workflow_class=None, workflow_args=None, ): - super().__init__(config=config, tokenizer=tokenizer, role_worker_mapping=role_worker_mapping, resource_pool_manager=resource_pool_manager, ray_worker_group_cls=ray_worker_group_cls, reward_fn=reward_fn, val_reward_fn=val_reward_fn) + super().__init__(config=config, tokenizer=tokenizer, processor=processor, role_worker_mapping=role_worker_mapping, resource_pool_manager=resource_pool_manager, ray_worker_group_cls=ray_worker_group_cls, reward_fn=reward_fn, val_reward_fn=val_reward_fn) self.workflow_class = workflow_class self.workflow_args = workflow_args or {} diff --git a/rllm/trainer/verl/agent_workflow_trainer_fireworks.py b/rllm/trainer/verl/agent_workflow_trainer_fireworks.py index 239348e7e..e296f0276 100644 --- a/rllm/trainer/verl/agent_workflow_trainer_fireworks.py +++ b/rllm/trainer/verl/agent_workflow_trainer_fireworks.py @@ -37,8 +37,9 @@ def __init__( self, config, tokenizer, - role_worker_mapping: dict[Role, WorkerType], - resource_pool_manager: ResourcePoolManager, + processor=None, + role_worker_mapping: dict[Role, WorkerType] = None, + resource_pool_manager: ResourcePoolManager = None, ray_worker_group_cls: RayWorkerGroup = RayWorkerGroup, reward_fn=None, val_reward_fn=None, @@ -48,6 +49,7 @@ def __init__( super().__init__( config=config, tokenizer=tokenizer, + processor=processor, role_worker_mapping=role_worker_mapping, resource_pool_manager=resource_pool_manager, ray_worker_group_cls=ray_worker_group_cls, diff --git a/rllm/trainer/verl/train_workflow_pipeline.py b/rllm/trainer/verl/train_workflow_pipeline.py index 4c522a3f8..e1ba93a80 100644 --- a/rllm/trainer/verl/train_workflow_pipeline.py +++ b/rllm/trainer/verl/train_workflow_pipeline.py @@ -84,12 +84,12 @@ def run(self, config, workflow_class=None, workflow_args=None): local_path = copy_to_local(config.actor_rollout_ref.model.path, use_shm=config.actor_rollout_ref.model.get("use_shm", False)) # Instantiate the tokenizer and processor. - from verl.utils import hf_tokenizer + from verl.utils import hf_processor, hf_tokenizer trust_remote_code = config.data.get("trust_remote_code", False) tokenizer = hf_tokenizer(local_path, trust_remote_code=trust_remote_code) # Used for multimodal LLM, could be None - # processor = hf_processor(local_path, trust_remote_code=trust_remote_code, use_fast=True) + processor = hf_processor(local_path, trust_remote_code=trust_remote_code, use_fast=True) # Define worker classes based on the actor strategy. if config.actor_rollout_ref.actor.strategy in {"fsdp", "fsdp2"}: @@ -175,6 +175,7 @@ def run(self, config, workflow_class=None, workflow_args=None): trainer = FireworksAgentWorkflowPPOTrainer( config=config, tokenizer=tokenizer, + processor=processor, role_worker_mapping=role_worker_mapping, resource_pool_manager=resource_pool_manager, ray_worker_group_cls=ray_worker_group_cls, diff --git a/scripts/install_verl.sh b/scripts/install_verl.sh index 2dfcda415..d3239ee16 100644 --- a/scripts/install_verl.sh +++ b/scripts/install_verl.sh @@ -7,7 +7,7 @@ pip install --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21 echo "2. install basic packages" -pip install "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \ +pip install "transformers[hf_xet]>=4.57.0" accelerate datasets peft hf-transfer \ "numpy<2.0.0" "pyarrow>=19.0.1" pandas \ "ray[default]" codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler blobfile xgrammar \ pytest py-spy pyext pre-commit ruff diff --git a/verl b/verl index 8fdc4d3f2..7df2afb93 160000 --- a/verl +++ b/verl @@ -1 +1 @@ -Subproject commit 8fdc4d3f202f41461f4de9f42a637228e342668b +Subproject commit 7df2afb936cd37b7b3a262edc119b2a57f070e3b