Skip to content

Commit 65a76bd

Browse files
committed
Adding support of multimodal models in vllm with CCL
Signed-off-by: Vahid Janfaza <vjanfaza@qti.qualcomm.com>
1 parent 2d137f9 commit 65a76bd

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

QEfficient/transformers/models/modeling_auto.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,10 @@ def compile(
10941094
raise ValueError("Expected at least one of 'skip_lang' or 'skip_vision' to be False")
10951095

10961096
output_names = self.model.get_output_names(kv_offload=True)
1097+
# For supporting VLLM and Disaggregated with CCL
1098+
if "comp_ctx_lengths_prefill" in compiler_options:
1099+
self.comp_ctx_lengths_prefill = compiler_options.pop("comp_ctx_lengths_prefill")
1100+
self.comp_ctx_lengths_decode = compiler_options.pop("comp_ctx_lengths_decode")
10971101

10981102
specializations, compiler_options = self.model.get_specializations(
10991103
batch_size=batch_size,
@@ -1652,6 +1656,10 @@ def compile(
16521656
)
16531657

16541658
output_names = self.model.get_output_names()
1659+
# For supporting VLLM and Disaggregated with CCL
1660+
if "comp_ctx_lengths_prefill" in compiler_options:
1661+
self.comp_ctx_lengths_prefill = compiler_options.pop("comp_ctx_lengths_prefill")
1662+
self.comp_ctx_lengths_decode = compiler_options.pop("comp_ctx_lengths_decode")
16551663

16561664
# Get specializations from modelling file
16571665
# TODO: expose this via the auto class as well

0 commit comments

Comments
 (0)