Skip to content

Commit cb0a0f5

Browse files
authored
[Feature]v091_patch add commit (#302)
v091_patch add commit
1 parent d2f3d9a commit cb0a0f5

File tree

1 file changed

+16
-12
lines changed

1 file changed

+16
-12
lines changed

ucm/integration/vllm/patch/0.9.1/vllm-adapt.patch

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
1-
From b837d3d46e593c946f5de70bdff178fa2bff882b Mon Sep 17 00:00:00 2001
2-
From: root <fenghao78@huawei.com>
3-
Date: Mon, 15 Sep 2025 22:07:21 -0700
4-
Subject: [PATCH] 0.9.1-patch
1+
From 76751cae43498d693a7a6dd2c8ec4b2d40672385 Mon Sep 17 00:00:00 2001
2+
From: zhou-haitao <1300182097@qq.com>
3+
Date: Tue, 21 Oct 2025 03:31:16 -0700
4+
Subject: [PATCH] Add commit
55

66
---
77
.../kv_transfer/kv_connector/utils.py | 113 +++++++++++++++
88
.../kv_transfer/kv_connector/v1/base.py | 8 ++
99
.../v1/shared_storage_connector.py | 7 +-
1010
vllm/v1/core/block_pool.py | 2 +-
11-
vllm/v1/core/sched/scheduler.py | 129 ++++++++++++++++++
11+
vllm/v1/core/sched/scheduler.py | 132 ++++++++++++++++++
1212
vllm/v1/core/single_type_kv_cache_manager.py | 2 +
1313
vllm/v1/executor/multiproc_executor.py | 37 ++++-
1414
vllm/v1/outputs.py | 5 +
1515
vllm/v1/request.py | 1 +
1616
vllm/v1/worker/gpu_input_batch.py | 9 ++
1717
vllm/v1/worker/gpu_model_runner.py | 52 ++++++-
18-
vllm/v1/worker/gpu_worker.py | 23 +++-
19-
12 files changed, 366 insertions(+), 22 deletions(-)
18+
vllm/v1/worker/gpu_worker.py | 23 ++-
19+
12 files changed, 369 insertions(+), 22 deletions(-)
2020

2121
diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py
2222
index b9bed06d7..de062cfb3 100644
@@ -211,7 +211,7 @@ index d21f94727..1800665c7 100644
211211
new_full_blocks = blocks[num_cached_blocks:num_full_blocks]
212212
assert len(block_hashes) >= num_cached_blocks
213213
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
214-
index 3d7bbe7e0..1ef81e960 100644
214+
index 3d7bbe7e0..b6d4a340a 100644
215215
--- a/vllm/v1/core/sched/scheduler.py
216216
+++ b/vllm/v1/core/sched/scheduler.py
217217
@@ -707,16 +707,28 @@ class Scheduler(SchedulerInterface):
@@ -243,16 +243,19 @@ index 3d7bbe7e0..1ef81e960 100644
243243
num_tokens_scheduled = num_scheduled_tokens.get(req_id, 0)
244244
if num_tokens_scheduled == 0:
245245
# The request was not scheduled in this step.
246-
@@ -761,6 +773,8 @@ class Scheduler(SchedulerInterface):
246+
@@ -761,6 +773,11 @@ class Scheduler(SchedulerInterface):
247247
new_logprobs = None
248248
new_token_ids = generated_token_ids
249249
kv_transfer_params = None
250250
+ if model_runner_output.finished_dumping is not None:
251251
+ request.succeed_dumped_blocks.extend(model_runner_output.finished_dumping.get(req_id, []))
252+
+ is_prefill = request.num_output_tokens == 0
253+
+ if is_prefill:
254+
+ self.connector.connector.commit(model_runner_output.finished_dumping.get(req_id, []), True)
252255

253256
# Append generated tokens and check for stop. Note that if
254257
# a request is still being prefilled, we expect the model runner
255-
@@ -824,6 +838,8 @@ class Scheduler(SchedulerInterface):
258+
@@ -824,6 +841,8 @@ class Scheduler(SchedulerInterface):
256259

257260
if not stopped:
258261
new_running.append(request)
@@ -261,7 +264,7 @@ index 3d7bbe7e0..1ef81e960 100644
261264

262265
# KV Connector: update state for finished KV Transfers.
263266
self._update_from_kv_xfer_finished(model_runner_output)
264-
@@ -1042,3 +1058,116 @@ class Scheduler(SchedulerInterface):
267+
@@ -1042,3 +1061,116 @@ class Scheduler(SchedulerInterface):
265268
for req_id in (model_runner_output.finished_sending or ()):
266269
logger.debug("Finished sending KV transfer for request %s", req_id)
267270
self._free_blocks(self.requests[req_id])
@@ -707,4 +710,5 @@ index b7d244f27..263a916d2 100644
707710
def profile(self, is_start: bool = True):
708711
if self.profiler is None:
709712
--
710-
2.34.1
713+
2.34.1
714+

0 commit comments

Comments
 (0)