File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed
QEfficient/transformers/models Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -2648,8 +2648,10 @@ def build_decode_specialization(
26482648 A dictionary defining the decode specialization, or None if it would be a duplicate
26492649 of the prefill specialization (e.g., if prefill_seq_len is 1 and not continuous batching).
26502650 """
2651- if prefill_seq_len == 1 and not self .continuous_batching :# and comp_ctx_lengths is None
2652- return None # Avoid duplication with prefill
2651+ if prefill_seq_len == 1 :
2652+ if not self .continuous_batching or batch_size == 1 :
2653+ return None # Avoid duplication with prefill
2654+
26532655 spec = {
26542656 "batch_size" : full_batch_size if self .continuous_batching else batch_size ,
26552657 "seq_len" : (num_speculative_tokens + 1 ) if self .is_tlm else 1 ,
You can’t perform that action at this time.
0 commit comments