Skip to content

Commit 2542e3d

Browse files
committed
adding Context Length Specialization (CCL)
Signed-off-by: Vahid Janfaza <vjanfaza@qti.qualcomm.com>
1 parent a29319a commit 2542e3d

File tree

4 files changed

+17
-9
lines changed

4 files changed

+17
-9
lines changed

examples/ccl_image_text_to_text_inference.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ def run_model(
9191
if __name__ == "__main__":
9292
# Model name and Input parameters
9393
model_name = "llava-hf/llava-1.5-7b-hf"
94-
# model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
9594
query = "Describe this image."
9695
image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
9796

@@ -101,10 +100,9 @@ def run_model(
101100
ctx_len = 8192
102101
generation_len = 128
103102
img_size = 336
104-
# img_size = 560
105103
num_cores = 16
106104
num_devices = 4
107-
comp_ctx_lengths = [4096,6144,8192]
105+
comp_ctx_lengths = [4096, 6144, 8192]
108106
prefill_ccl_len = 1
109107

110108
run_model(

examples/compute_context_length.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,5 @@
4444
prompts=[
4545
"My name is ",
4646
],
47-
tokenizer=tokenizer
47+
tokenizer=tokenizer,
4848
)

examples/granite_example/ccl_granitemoe_inference.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,22 @@
1616
# We will use prompt_len=1 for compilation for both cb and non-cb inference
1717
"""
1818

19-
comp_ctx_lengths = [256,512,1024,2048] # None
19+
comp_ctx_lengths = [256, 512, 1024, 2048] # None
2020

2121
## Prefill_ccl_len shows how many numbers in the comp_ctx_lengths list is related to prefilling and the rest would be for decoding. The default value is 1.
2222
prefill_ccl_len = 2
2323

24-
model = QEFFAutoModelForCausalLM.from_pretrained(model_name, comp_ctx_lengths=comp_ctx_lengths, prefill_ccl_len=prefill_ccl_len, continuous_batching=False)
25-
model.compile(prefill_seq_len=1, ctx_len=2048, full_batch_size=1, num_cores=16, num_devices=4, mxfp6_matmul=False, mxint8_kv_cache=False)
24+
model = QEFFAutoModelForCausalLM.from_pretrained(
25+
model_name, comp_ctx_lengths=comp_ctx_lengths, prefill_ccl_len=prefill_ccl_len, continuous_batching=False
26+
)
27+
model.compile(
28+
prefill_seq_len=1,
29+
ctx_len=2048,
30+
full_batch_size=1,
31+
num_cores=16,
32+
num_devices=4,
33+
mxfp6_matmul=False,
34+
mxint8_kv_cache=False,
35+
)
2636
tokenizer = AutoTokenizer.from_pretrained(model_name)
27-
exec_info = model.generate(prompts=Constants.INPUT_STR, tokenizer=tokenizer, device_id=[16,17,18,19])
37+
exec_info = model.generate(prompts=Constants.INPUT_STR, tokenizer=tokenizer, device_id=[16, 17, 18, 19])

tests/transformers/test_comp_ctx_length.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,4 @@ def test_causal_lm_compile(config, cb, tmp_cache):
173173
end = perf_counter()
174174
compile_time = end - start
175175
assert compile_time < 2.0
176-
assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))
176+
assert os.path.isfile(os.path.join(os.path.dirname(qeff_model.qpc_path), "qconfig.json"))

0 commit comments

Comments
 (0)