Skip to content

Commit 5410733

Browse files
committed
Adding Compute-Context-Length(CCL)
Signed-off-by: Vahid Janfaza <vjanfaza@qti.qualcomm.com>
1 parent f00737f commit 5410733

File tree

4 files changed

+12
-11
lines changed

4 files changed

+12
-11
lines changed

examples/ccl_mistral3_example.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,13 @@ def run_model(
3838
config = AutoConfig.from_pretrained(model_name)
3939
config.vision_config._attn_implementation = "eager"
4040

41-
model = QEFFAutoModelForImageTextToText.from_pretrained(model_name,
42-
kv_offload=kv_offload,
41+
model = QEFFAutoModelForImageTextToText.from_pretrained(
42+
model_name,
43+
kv_offload=kv_offload,
4344
config=config,
4445
ctx_len=ctx_len,
4546
comp_ctx_lengths_prefill=comp_ctx_lengths_prefill,
46-
comp_ctx_lengths_decode=comp_ctx_lengths_decode
47+
comp_ctx_lengths_decode=comp_ctx_lengths_decode,
4748
)
4849

4950
## STEP - 2 Export & Compile the Model

examples/ccl_qwen2_5_vl_example.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@
2424
ctx_len = 32768
2525

2626
comp_ctx_lengths_prefill = [4000]
27-
comp_ctx_lengths_decode = [4096, 8192,16384, ctx_len]
27+
comp_ctx_lengths_decode = [4096, 8192, 16384, ctx_len]
2828

2929
qeff_model = QEFFAutoModelForImageTextToText.from_pretrained(
30-
model_id,
30+
model_id,
3131
comp_ctx_lengths_prefill=comp_ctx_lengths_prefill,
3232
comp_ctx_lengths_decode=comp_ctx_lengths_decode,
3333
ctx_len=ctx_len,
34-
attn_implementation="eager",
35-
kv_offload=True,
36-
config=config
34+
attn_implementation="eager",
35+
kv_offload=True,
36+
config=config,
3737
)
3838
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
3939
processor = AutoProcessor.from_pretrained(model_id)

examples/compute_context_length.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
ctx_len = 1024
1919
comp_ctx_lengths_prefill = [256]
20-
comp_ctx_lengths_decode = [512,ctx_len]
20+
comp_ctx_lengths_decode = [512, ctx_len]
2121

2222
# model_name = "google/gemma-7b"
2323
# model_name = "google/gemma-2-2b"
@@ -57,5 +57,5 @@
5757
"My name is ",
5858
],
5959
tokenizer=tokenizer,
60-
generation_len=128
60+
generation_len=128,
6161
)

examples/qwen3moe_example/ccl_qwen3moe_inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
ctx_len = 8192
2020

2121
comp_ctx_lengths_prefill = [4096]
22-
comp_ctx_lengths_decode = [6144,8192]
22+
comp_ctx_lengths_decode = [6144, 8192]
2323

2424
model = QEFFAutoModelForCausalLM.from_pretrained(
2525
model_name,

0 commit comments

Comments
 (0)