Skip to content

Commit fbc369c

Browse files
ywang96bigPYJ1151
authored andcommitted
[Bugfix][Perf] Revert applying HF processor on text-only inputs for multimodal models (#28858)
Signed-off-by: Roger Wang <hey@rogerw.io> Signed-off-by: jiang1.li <jiang1.li@intel.com>
1 parent 72d3f5e commit fbc369c

File tree

2 files changed

+11
-38
lines changed

2 files changed

+11
-38
lines changed

tests/test_inputs.py

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -86,34 +86,6 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
8686
assert zipped["mm_processor_kwargs"] == exp_kwargs
8787

8888

89-
@pytest.mark.parametrize(
90-
"model_id",
91-
[
92-
"facebook/opt-125m",
93-
],
94-
)
95-
@pytest.mark.parametrize(
96-
"prompt",
97-
[
98-
{
99-
"prompt": "",
100-
"multi_modal_data": {"dummy": []},
101-
},
102-
{
103-
"prompt_token_ids": [],
104-
"multi_modal_data": {"dummy": []},
105-
},
106-
],
107-
)
108-
def test_preprocessor_text_no_mm_inputs(model_id, prompt):
109-
model_config = ModelConfig(model=model_id)
110-
tokenizer = init_tokenizer_from_configs(model_config)
111-
input_preprocessor = InputPreprocessor(model_config, tokenizer)
112-
113-
with pytest.raises(ValueError, match="does not support multimodal inputs"):
114-
input_preprocessor.preprocess(prompt)
115-
116-
11789
@pytest.mark.parametrize(
11890
"model_id",
11991
[
@@ -127,6 +99,13 @@ def test_preprocessor_text_no_mm_inputs(model_id, prompt):
12799
{"prompt_token_ids": []},
128100
],
129101
)
102+
@pytest.mark.skip(
103+
reason=(
104+
"Applying huggingface processor on text inputs results in "
105+
"significant performance regression for multimodal models. "
106+
"See https://github.com/vllm-project/vllm/issues/26320"
107+
)
108+
)
130109
def test_preprocessor_always_mm_code_path(model_id, prompt):
131110
model_config = ModelConfig(model=model_id)
132111
tokenizer = init_tokenizer_from_configs(model_config)

vllm/inputs/preprocess.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -348,18 +348,15 @@ def _process_tokens(
348348
)
349349

350350
inputs: TokenInputs | MultiModalInputs
351-
if self.model_config.is_multimodal_model:
351+
if multi_modal_data := parsed_content.get("multi_modal_data"):
352352
inputs = self._process_multimodal(
353353
prompt_token_ids,
354-
parsed_content.get("multi_modal_data") or {},
354+
multi_modal_data,
355355
parsed_content.get("mm_processor_kwargs") or {},
356356
tokenization_kwargs=tokenization_kwargs,
357357
mm_uuids=mm_uuids,
358358
)
359359
else:
360-
if parsed_content.get("multi_modal_data"):
361-
raise ValueError("This model does not support multimodal inputs")
362-
363360
inputs = token_inputs(prompt_token_ids)
364361

365362
if cache_salt := parsed_content.get("cache_salt"):
@@ -377,18 +374,15 @@ def _process_text(
377374
prompt_text = parsed_content["prompt"]
378375

379376
inputs: TokenInputs | MultiModalInputs
380-
if self.model_config.is_multimodal_model:
377+
if multi_modal_data := parsed_content.get("multi_modal_data"):
381378
inputs = self._process_multimodal(
382379
prompt_text,
383-
parsed_content.get("multi_modal_data") or {},
380+
multi_modal_data,
384381
parsed_content.get("mm_processor_kwargs") or {},
385382
tokenization_kwargs=tokenization_kwargs,
386383
mm_uuids=mm_uuids,
387384
)
388385
else:
389-
if parsed_content.get("multi_modal_data"):
390-
raise ValueError("This model does not support multimodal inputs")
391-
392386
prompt_token_ids = self._tokenize_prompt(
393387
prompt_text,
394388
tokenization_kwargs=tokenization_kwargs,

0 commit comments

Comments
 (0)