1- From b837d3d46e593c946f5de70bdff178fa2bff882b Mon Sep 17 00:00:00 2001
2- From: root <fenghao78@huawei .com>
3- Date: Mon, 15 Sep 2025 22:07:21 -0700
4- Subject: [PATCH] 0.9.1-patch
1+ From 76751cae43498d693a7a6dd2c8ec4b2d40672385 Mon Sep 17 00:00:00 2001
2+ From: zhou-haitao <1300182097@qq .com>
3+ Date: Tue, 21 Oct 2025 03:31:16 -0700
4+ Subject: [PATCH] Add commit
55
66---
77 .../kv_transfer/kv_connector/utils.py | 113 +++++++++++++++
88 .../kv_transfer/kv_connector/v1/base.py | 8 ++
99 .../v1/shared_storage_connector.py | 7 +-
1010 vllm/v1/core/block_pool.py | 2 +-
11- vllm/v1/core/sched/scheduler.py | 129 ++++++++++++++++++
11+ vllm/v1/core/sched/scheduler.py | 132 ++++++++++++++++++
1212 vllm/v1/core/single_type_kv_cache_manager.py | 2 +
1313 vllm/v1/executor/multiproc_executor.py | 37 ++++-
1414 vllm/v1/outputs.py | 5 +
1515 vllm/v1/request.py | 1 +
1616 vllm/v1/worker/gpu_input_batch.py | 9 ++
1717 vllm/v1/worker/gpu_model_runner.py | 52 ++++++-
18- vllm/v1/worker/gpu_worker.py | 23 +++ -
19- 12 files changed, 366 insertions(+), 22 deletions(-)
18+ vllm/v1/worker/gpu_worker.py | 23 ++-
19+ 12 files changed, 369 insertions(+), 22 deletions(-)
2020
2121diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py
2222index b9bed06d7..de062cfb3 100644
@@ -211,7 +211,7 @@ index d21f94727..1800665c7 100644
211211 new_full_blocks = blocks[num_cached_blocks:num_full_blocks]
212212 assert len(block_hashes) >= num_cached_blocks
213213diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
214- index 3d7bbe7e0..1ef81e960 100644
214+ index 3d7bbe7e0..b6d4a340a 100644
215215--- a/vllm/v1/core/sched/scheduler.py
216216+++ b/vllm/v1/core/sched/scheduler.py
217217@@ -707,16 +707,28 @@ class Scheduler(SchedulerInterface):
@@ -243,16 +243,19 @@ index 3d7bbe7e0..1ef81e960 100644
243243 num_tokens_scheduled = num_scheduled_tokens.get(req_id, 0)
244244 if num_tokens_scheduled == 0:
245245 # The request was not scheduled in this step.
246- @@ -761,6 +773,8 @@ class Scheduler(SchedulerInterface):
246+ @@ -761,6 +773,11 @@ class Scheduler(SchedulerInterface):
247247 new_logprobs = None
248248 new_token_ids = generated_token_ids
249249 kv_transfer_params = None
250250+ if model_runner_output.finished_dumping is not None:
251251+ request.succeed_dumped_blocks.extend(model_runner_output.finished_dumping.get(req_id, []))
252+ + is_prefill = request.num_output_tokens == 0
253+ + if is_prefill:
254+ + self.connector.connector.commit(model_runner_output.finished_dumping.get(req_id, []), True)
252255
253256 # Append generated tokens and check for stop. Note that if
254257 # a request is still being prefilled, we expect the model runner
255- @@ -824,6 +838 ,8 @@ class Scheduler(SchedulerInterface):
258+ @@ -824,6 +841 ,8 @@ class Scheduler(SchedulerInterface):
256259
257260 if not stopped:
258261 new_running.append(request)
@@ -261,7 +264,7 @@ index 3d7bbe7e0..1ef81e960 100644
261264
262265 # KV Connector: update state for finished KV Transfers.
263266 self._update_from_kv_xfer_finished(model_runner_output)
264- @@ -1042,3 +1058 ,116 @@ class Scheduler(SchedulerInterface):
267+ @@ -1042,3 +1061 ,116 @@ class Scheduler(SchedulerInterface):
265268 for req_id in (model_runner_output.finished_sending or ()):
266269 logger.debug("Finished sending KV transfer for request %s", req_id)
267270 self._free_blocks(self.requests[req_id])
@@ -707,4 +710,5 @@ index b7d244f27..263a916d2 100644
707710 def profile(self, is_start: bool = True):
708711 if self.profiler is None:
709712- -
710- 2.34.1
713+ 2.34.1
714+
0 commit comments