File tree Expand file tree Collapse file tree 1 file changed +8
-0
lines changed
QEfficient/transformers/models Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Original file line number Diff line number Diff line change @@ -1094,6 +1094,10 @@ def compile(
10941094 raise ValueError ("Expected at least one of 'skip_lang' or 'skip_vision' to be False" )
10951095
10961096 output_names = self .model .get_output_names (kv_offload = True )
1097+ # For supporting VLLM and Disaggregated with CCL
1098+ if "comp_ctx_lengths_prefill" in compiler_options :
1099+ self .comp_ctx_lengths_prefill = compiler_options .pop ("comp_ctx_lengths_prefill" )
1100+ self .comp_ctx_lengths_decode = compiler_options .pop ("comp_ctx_lengths_decode" )
10971101
10981102 specializations , compiler_options = self .model .get_specializations (
10991103 batch_size = batch_size ,
@@ -1652,6 +1656,10 @@ def compile(
16521656 )
16531657
16541658 output_names = self .model .get_output_names ()
1659+ # For supporting VLLM and Disaggregated with CCL
1660+ if "comp_ctx_lengths_prefill" in compiler_options :
1661+ self .comp_ctx_lengths_prefill = compiler_options .pop ("comp_ctx_lengths_prefill" )
1662+ self .comp_ctx_lengths_decode = compiler_options .pop ("comp_ctx_lengths_decode" )
16551663
16561664 # Get specializations from modelling file
16571665 # TODO: expose this via the auto class as well
You can’t perform that action at this time.
0 commit comments