File tree Expand file tree Collapse file tree 5 files changed +9
-17
lines changed Expand file tree Collapse file tree 5 files changed +9
-17
lines changed Original file line number Diff line number Diff line change @@ -125,7 +125,7 @@ async def reset_mm_cache(self) -> None:
125125 ...
126126
127127 @abstractmethod
128- async def reset_prefix_cache (self , device : Device | None = None ) -> None :
128+ async def reset_prefix_cache (self ) -> None :
129129 """Reset the prefix cache"""
130130 ...
131131
Original file line number Diff line number Diff line change 3232 TokenizerMode ,
3333)
3434from vllm .engine .arg_utils import EngineArgs
35- from vllm .engine .protocol import Device
3635from vllm .entrypoints .chat_utils import (
3736 ChatCompletionMessageParam ,
3837 ChatTemplateContentFormatOption ,
@@ -1499,8 +1498,8 @@ def start_profile(self) -> None:
14991498 def stop_profile (self ) -> None :
15001499 self .llm_engine .stop_profile ()
15011500
1502- def reset_prefix_cache (self , device : Device | None = None ) -> None :
1503- self .llm_engine .reset_prefix_cache (device )
1501+ def reset_prefix_cache (self ) -> None :
1502+ self .llm_engine .reset_prefix_cache ()
15041503
15051504 def sleep (self , level : int = 1 ):
15061505 """
Original file line number Diff line number Diff line change 3939import vllm .envs as envs
4040from vllm .config import VllmConfig
4141from vllm .engine .arg_utils import AsyncEngineArgs
42- from vllm .engine .protocol import Device , EngineClient
42+ from vllm .engine .protocol import EngineClient
4343from vllm .entrypoints .anthropic .protocol import (
4444 AnthropicError ,
4545 AnthropicErrorResponse ,
@@ -1069,12 +1069,8 @@ async def reset_prefix_cache(raw_request: Request):
10691069 Reset the prefix cache. Note that we currently do not check if the
10701070 prefix cache is successfully reset in the API server.
10711071 """
1072- device = None
1073- device_str = raw_request .query_params .get ("device" )
1074- if device_str is not None :
1075- device = Device [device_str .upper ()]
1076- logger .info ("Resetting prefix cache with specific %s..." , str (device ))
1077- await engine_client (raw_request ).reset_prefix_cache (device )
1072+ logger .info ("Resetting prefix cache..." )
1073+ await engine_client (raw_request ).reset_prefix_cache ()
10781074 return Response (status_code = 200 )
10791075
10801076 @router .post ("/reset_mm_cache" )
Original file line number Diff line number Diff line change 1414import vllm .envs as envs
1515from vllm .config import VllmConfig
1616from vllm .engine .arg_utils import AsyncEngineArgs
17- from vllm .engine .protocol import Device , EngineClient
17+ from vllm .engine .protocol import EngineClient
1818from vllm .entrypoints .utils import _validate_truncation_size
1919from vllm .inputs import PromptType
2020from vllm .logger import init_logger
@@ -672,9 +672,7 @@ async def reset_mm_cache(self) -> None:
672672 self .processor .clear_mm_cache ()
673673 await self .engine_core .reset_mm_cache_async ()
674674
675- async def reset_prefix_cache (self , device : Device | None = None ) -> None :
676- if device == Device .CPU :
677- raise ValueError ("Not supported on CPU." )
675+ async def reset_prefix_cache (self ) -> None :
678676 await self .engine_core .reset_prefix_cache_async ()
679677
680678 async def sleep (self , level : int = 1 ) -> None :
Original file line number Diff line number Diff line change 1414from vllm .distributed import stateless_destroy_torch_distributed_process_group
1515from vllm .distributed .parallel_state import get_dp_group
1616from vllm .engine .arg_utils import EngineArgs
17- from vllm .engine .protocol import Device
1817from vllm .inputs import PromptType
1918from vllm .logger import init_logger
2019from vllm .lora .request import LoRARequest
@@ -321,7 +320,7 @@ def reset_mm_cache(self):
321320 self .processor .clear_mm_cache ()
322321 self .engine_core .reset_mm_cache ()
323322
324- def reset_prefix_cache (self , device : Device | None = None ):
323+ def reset_prefix_cache (self ):
325324 self .engine_core .reset_prefix_cache ()
326325
327326 def sleep (self , level : int = 1 ):
You can’t perform that action at this time.
0 commit comments