55from typing import AsyncIterator
66
77import anyio
8+ import vllm
89from fastapi import APIRouter , Depends , status
910from fastapi import HTTPException , Request
1011from loguru import logger
3839)
3940
4041chat_router = APIRouter (prefix = "/chat" )
42+ vllm_version = vllm .__version__
4143
4244
4345def get_engine ():
@@ -105,17 +107,16 @@ async def create_chat_completion(
105107 try :
106108 from vllm .model_executor .guided_decoding import get_guided_decoding_logits_processor
107109
108- decoding_config = await engine .model .get_decoding_config ()
109-
110- try :
110+ if vllm_version >= "0.4.3" :
111+ decoding_config = await engine .model .get_decoding_config ()
111112 guided_decode_logits_processor = (
112113 await get_guided_decoding_logits_processor (
113114 request .guided_decoding_backend or decoding_config .guided_decoding_backend ,
114115 request ,
115116 engine .tokenizer ,
116117 )
117118 )
118- except TypeError :
119+ else :
119120 guided_decode_logits_processor = (
120121 await get_guided_decoding_logits_processor (
121122 request ,
@@ -128,7 +129,7 @@ async def create_chat_completion(
128129 except ImportError :
129130 pass
130131
131- try :
132+ if vllm_version >= "0.4.3" :
132133 result_generator = engine .model .generate (
133134 {
134135 "prompt" : prompt if isinstance (prompt , str ) else None ,
@@ -138,7 +139,7 @@ async def create_chat_completion(
138139 request_id ,
139140 lora_request ,
140141 )
141- except TypeError :
142+ else :
142143 result_generator = engine .model .generate (
143144 prompt if isinstance (prompt , str ) else None ,
144145 sampling_params ,
0 commit comments