Skip to content

Commit e9abfe7

Browse files
zhuohan123geodavic
authored andcommitted
[RL] [V1] Remove unused device argument from reset_kv_cache (vllm-project#28766)
Signed-off-by: Zhuohan Li <zhuohan123@gmail.com> Signed-off-by: George D. Torres <gdavtor@gmail.com>
1 parent 5ed2c71 commit e9abfe7

File tree

5 files changed

+9
-17
lines changed

5 files changed

+9
-17
lines changed

vllm/engine/protocol.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ async def reset_mm_cache(self) -> None:
125125
...
126126

127127
@abstractmethod
128-
async def reset_prefix_cache(self, device: Device | None = None) -> None:
128+
async def reset_prefix_cache(self) -> None:
129129
"""Reset the prefix cache"""
130130
...
131131

vllm/entrypoints/llm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
TokenizerMode,
3333
)
3434
from vllm.engine.arg_utils import EngineArgs
35-
from vllm.engine.protocol import Device
3635
from vllm.entrypoints.chat_utils import (
3736
ChatCompletionMessageParam,
3837
ChatTemplateContentFormatOption,
@@ -1499,8 +1498,8 @@ def start_profile(self) -> None:
14991498
def stop_profile(self) -> None:
15001499
self.llm_engine.stop_profile()
15011500

1502-
def reset_prefix_cache(self, device: Device | None = None) -> None:
1503-
self.llm_engine.reset_prefix_cache(device)
1501+
def reset_prefix_cache(self) -> None:
1502+
self.llm_engine.reset_prefix_cache()
15041503

15051504
def sleep(self, level: int = 1):
15061505
"""

vllm/entrypoints/openai/api_server.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
import vllm.envs as envs
4040
from vllm.config import VllmConfig
4141
from vllm.engine.arg_utils import AsyncEngineArgs
42-
from vllm.engine.protocol import Device, EngineClient
42+
from vllm.engine.protocol import EngineClient
4343
from vllm.entrypoints.anthropic.protocol import (
4444
AnthropicError,
4545
AnthropicErrorResponse,
@@ -1069,12 +1069,8 @@ async def reset_prefix_cache(raw_request: Request):
10691069
Reset the prefix cache. Note that we currently do not check if the
10701070
prefix cache is successfully reset in the API server.
10711071
"""
1072-
device = None
1073-
device_str = raw_request.query_params.get("device")
1074-
if device_str is not None:
1075-
device = Device[device_str.upper()]
1076-
logger.info("Resetting prefix cache with specific %s...", str(device))
1077-
await engine_client(raw_request).reset_prefix_cache(device)
1072+
logger.info("Resetting prefix cache...")
1073+
await engine_client(raw_request).reset_prefix_cache()
10781074
return Response(status_code=200)
10791075

10801076
@router.post("/reset_mm_cache")

vllm/v1/engine/async_llm.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import vllm.envs as envs
1515
from vllm.config import VllmConfig
1616
from vllm.engine.arg_utils import AsyncEngineArgs
17-
from vllm.engine.protocol import Device, EngineClient
17+
from vllm.engine.protocol import EngineClient
1818
from vllm.entrypoints.utils import _validate_truncation_size
1919
from vllm.inputs import PromptType
2020
from vllm.logger import init_logger
@@ -672,9 +672,7 @@ async def reset_mm_cache(self) -> None:
672672
self.processor.clear_mm_cache()
673673
await self.engine_core.reset_mm_cache_async()
674674

675-
async def reset_prefix_cache(self, device: Device | None = None) -> None:
676-
if device == Device.CPU:
677-
raise ValueError("Not supported on CPU.")
675+
async def reset_prefix_cache(self) -> None:
678676
await self.engine_core.reset_prefix_cache_async()
679677

680678
async def sleep(self, level: int = 1) -> None:

vllm/v1/engine/llm_engine.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from vllm.distributed import stateless_destroy_torch_distributed_process_group
1515
from vllm.distributed.parallel_state import get_dp_group
1616
from vllm.engine.arg_utils import EngineArgs
17-
from vllm.engine.protocol import Device
1817
from vllm.inputs import PromptType
1918
from vllm.logger import init_logger
2019
from vllm.lora.request import LoRARequest
@@ -321,7 +320,7 @@ def reset_mm_cache(self):
321320
self.processor.clear_mm_cache()
322321
self.engine_core.reset_mm_cache()
323322

324-
def reset_prefix_cache(self, device: Device | None = None):
323+
def reset_prefix_cache(self):
325324
self.engine_core.reset_prefix_cache()
326325

327326
def sleep(self, level: int = 1):

0 commit comments

Comments
 (0)