improving handeling CCL lists

vjanfaza · vjanfaza · commit 71c5182651da · 2025-10-21T18:53:54.000-07:00
Signed-off-by: Vahid Janfaza &lt;vjanfaza@qti.qualcomm.com&gt;
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -927,7 +927,9 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
 
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
 
-        comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len = process_ccl_specializations(kwargs)
+        comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len = process_ccl_specializations(
+            kwargs
+        )
 
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
         return cls(
@@ -1534,7 +1536,9 @@ def from_pretrained(
 
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
 
-        comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len = process_ccl_specializations(kwargs)
+        comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len = process_ccl_specializations(
+            kwargs
+        )
 
         from transformers import AutoConfig
 
@@ -2088,7 +2092,9 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, kv_offload: Optiona
 
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
 
-        comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len = process_ccl_specializations(kwargs)
+        comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len = process_ccl_specializations(
+            kwargs
+        )
 
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
         return cls(
@@ -2294,7 +2300,9 @@ def from_pretrained(
 
         kv_offload = kwargs.pop("kv_offload", None)
 
-        comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len = process_ccl_specializations(kwargs)
+        comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len = process_ccl_specializations(
+            kwargs
+        )
 
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
diff --git a/QEfficient/utils/check_ccl_specializations.py b/QEfficient/utils/check_ccl_specializations.py
@@ -5,15 +5,11 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import List, Optional
-
 
 # def process_ccl_specializations(
 #     ccl_prefill: Optional[List[int]] = None, ccl_decode: Optional[List[int]] = None, ctx_len: Optional[int] = None
 # ):
-def process_ccl_specializations(
-    kwargs
-):
+def process_ccl_specializations(kwargs):
     ccl_prefill = kwargs.pop("comp_ctx_lengths_prefill", None)
     ccl_decode = kwargs.pop("comp_ctx_lengths_decode", None)
     ctx_len = kwargs.pop("ctx_len", None)
@@ -24,9 +20,9 @@ def process_ccl_specializations(
 
     if ccl_prefill is None or ccl_decode is None:
         return None, None, ctx_len, prefill_seq_len
-    
+
     if prefill_seq_len == 1:
-        #both prefill and decode ccl can share the same specializations since prefill_seq_len=1. So, a sorted union of both lists can be used for both of them.
+        # both prefill and decode ccl can share the same specializations since prefill_seq_len=1. So, a sorted union of both lists can be used for both of them.
         ccl_union_all = sorted(set(ccl_prefill + ccl_decode))
         ccl_union_all = [min(x, ctx_len) for x in ccl_union_all]
         return ccl_union_all, ccl_union_all, ctx_len, prefill_seq_len