Adding Compute-Context-Length(CCL)

vjanfaza · vjanfaza · commit 5410733fa417 · 2025-10-16T23:29:20.000-07:00
Signed-off-by: Vahid Janfaza &lt;vjanfaza@qti.qualcomm.com&gt;
diff --git a/examples/ccl_mistral3_example.py b/examples/ccl_mistral3_example.py
@@ -38,12 +38,13 @@ def run_model(
     config = AutoConfig.from_pretrained(model_name)
     config.vision_config._attn_implementation = "eager"
 
-    model = QEFFAutoModelForImageTextToText.from_pretrained(model_name, 
-        kv_offload=kv_offload, 
+    model = QEFFAutoModelForImageTextToText.from_pretrained(
+        model_name,
+        kv_offload=kv_offload,
         config=config,
         ctx_len=ctx_len,
         comp_ctx_lengths_prefill=comp_ctx_lengths_prefill,
-        comp_ctx_lengths_decode=comp_ctx_lengths_decode
+        comp_ctx_lengths_decode=comp_ctx_lengths_decode,
     )
 
     ## STEP - 2 Export & Compile the Model
diff --git a/examples/ccl_qwen2_5_vl_example.py b/examples/ccl_qwen2_5_vl_example.py
@@ -24,16 +24,16 @@
 ctx_len = 32768
 
 comp_ctx_lengths_prefill = [4000]
-comp_ctx_lengths_decode = [4096, 8192,16384, ctx_len]
+comp_ctx_lengths_decode = [4096, 8192, 16384, ctx_len]
 
 qeff_model = QEFFAutoModelForImageTextToText.from_pretrained(
-    model_id, 
+    model_id,
     comp_ctx_lengths_prefill=comp_ctx_lengths_prefill,
     comp_ctx_lengths_decode=comp_ctx_lengths_decode,
     ctx_len=ctx_len,
-    attn_implementation="eager", 
-    kv_offload=True, 
-    config=config
+    attn_implementation="eager",
+    kv_offload=True,
+    config=config,
 )
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
 processor = AutoProcessor.from_pretrained(model_id)
diff --git a/examples/compute_context_length.py b/examples/compute_context_length.py
@@ -17,7 +17,7 @@
 
 ctx_len = 1024
 comp_ctx_lengths_prefill = [256]
-comp_ctx_lengths_decode = [512,ctx_len]
+comp_ctx_lengths_decode = [512, ctx_len]
 
 # model_name = "google/gemma-7b"
 # model_name = "google/gemma-2-2b"
@@ -57,5 +57,5 @@
         "My name is ",
     ],
     tokenizer=tokenizer,
-    generation_len=128
+    generation_len=128,
 )
diff --git a/examples/qwen3moe_example/ccl_qwen3moe_inference.py b/examples/qwen3moe_example/ccl_qwen3moe_inference.py
@@ -19,7 +19,7 @@
 ctx_len = 8192
 
 comp_ctx_lengths_prefill = [4096]
-comp_ctx_lengths_decode = [6144,8192]
+comp_ctx_lengths_decode = [6144, 8192]
 
 model = QEFFAutoModelForCausalLM.from_pretrained(
     model_name,