@@ -245,17 +245,13 @@ def get_diff_sampling_param(self):
245245 return self .diff_sampling_param or {}
246246
247247
248- def _build_serving_chat (
249- engine : AsyncLLM , model_config : MockModelConfig
250- ) -> OpenAIServingChat :
248+ def _build_serving_chat (engine : AsyncLLM ) -> OpenAIServingChat :
251249 models = OpenAIServingModels (
252250 engine_client = engine ,
253251 base_model_paths = BASE_MODEL_PATHS ,
254- model_config = model_config ,
255252 )
256253 serving_chat = OpenAIServingChat (
257254 engine ,
258- model_config ,
259255 models ,
260256 response_role = "assistant" ,
261257 chat_template = CHAT_TEMPLATE ,
@@ -280,18 +276,17 @@ async def _fake_process_inputs(
280276
281277@dataclass
282278class MockEngine :
283- async def get_model_config (self ):
284- return MockModelConfig ()
279+ model_config : MockModelConfig = field (default_factory = MockModelConfig )
280+ processor : MagicMock = field (default_factory = MagicMock )
281+ io_processor : MagicMock = field (default_factory = MagicMock )
285282
286283
287284async def _async_serving_chat_init ():
288285 engine = MockEngine ()
289- model_config = await engine .get_model_config ()
290286
291- models = OpenAIServingModels (engine , model_config , BASE_MODEL_PATHS )
287+ models = OpenAIServingModels (engine , BASE_MODEL_PATHS )
292288 serving_completion = OpenAIServingChat (
293289 engine ,
294- model_config ,
295290 models ,
296291 response_role = "assistant" ,
297292 chat_template = CHAT_TEMPLATE ,
@@ -311,8 +306,11 @@ async def test_serving_chat_returns_correct_model_name():
311306 mock_engine = MagicMock (spec = AsyncLLM )
312307 mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
313308 mock_engine .errored = False
309+ mock_engine .model_config = MockModelConfig ()
310+ mock_engine .processor = MagicMock ()
311+ mock_engine .io_processor = MagicMock ()
314312
315- serving_chat = _build_serving_chat (mock_engine , MockModelConfig () )
313+ serving_chat = _build_serving_chat (mock_engine )
316314 messages = [{"role" : "user" , "content" : "what is 1+1?" }]
317315
318316 async def return_model_name (* args ):
@@ -338,8 +336,11 @@ async def test_serving_chat_should_set_correct_max_tokens():
338336 mock_engine = MagicMock (spec = AsyncLLM )
339337 mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
340338 mock_engine .errored = False
339+ mock_engine .model_config = MockModelConfig ()
340+ mock_engine .processor = MagicMock ()
341+ mock_engine .io_processor = MagicMock ()
341342
342- serving_chat = _build_serving_chat (mock_engine , MockModelConfig () )
343+ serving_chat = _build_serving_chat (mock_engine )
343344
344345 req = ChatCompletionRequest (
345346 model = MODEL_NAME ,
@@ -368,9 +369,12 @@ async def test_serving_chat_should_set_correct_max_tokens():
368369 mock_engine = MagicMock (spec = AsyncLLM )
369370 mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
370371 mock_engine .errored = False
372+ mock_engine .model_config = mock_model_config
373+ mock_engine .processor = MagicMock ()
374+ mock_engine .io_processor = MagicMock ()
371375
372376 # Initialize the serving chat
373- serving_chat = _build_serving_chat (mock_engine , mock_model_config )
377+ serving_chat = _build_serving_chat (mock_engine )
374378
375379 # Test Case 1: No max_tokens specified in request
376380 req = ChatCompletionRequest (
@@ -410,9 +414,12 @@ async def test_serving_chat_should_set_correct_max_tokens():
410414 mock_engine = MagicMock (spec = AsyncLLM )
411415 mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
412416 mock_engine .errored = False
417+ mock_engine .model_config = mock_model_config
418+ mock_engine .processor = MagicMock ()
419+ mock_engine .io_processor = MagicMock ()
413420
414421 # Initialize the serving chat
415- serving_chat = _build_serving_chat (mock_engine , mock_model_config )
422+ serving_chat = _build_serving_chat (mock_engine )
416423
417424 # Test case 1: No max_tokens specified, defaults to context_window
418425 req = ChatCompletionRequest (
@@ -453,9 +460,12 @@ async def test_serving_chat_could_load_correct_generation_config():
453460 mock_engine = MagicMock (spec = AsyncLLM )
454461 mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
455462 mock_engine .errored = False
463+ mock_engine .model_config = mock_model_config
464+ mock_engine .processor = MagicMock ()
465+ mock_engine .io_processor = MagicMock ()
456466
457467 # Initialize the serving chat
458- serving_chat = _build_serving_chat (mock_engine , mock_model_config )
468+ serving_chat = _build_serving_chat (mock_engine )
459469
460470 req = ChatCompletionRequest (
461471 model = MODEL_NAME ,
@@ -496,8 +506,11 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
496506 mock_engine = MagicMock (spec = AsyncLLM )
497507 mock_engine .get_tokenizer .return_value = get_tokenizer (MODEL_NAME )
498508 mock_engine .errored = False
509+ mock_engine .model_config = mock_model_config
510+ mock_engine .processor = MagicMock ()
511+ mock_engine .io_processor = MagicMock ()
499512
500- serving_chat = _build_serving_chat (mock_engine , mock_model_config )
513+ serving_chat = _build_serving_chat (mock_engine )
501514
502515 # Test cache_salt
503516 req = ChatCompletionRequest (
0 commit comments