Skip to content

Commit 4fac443

Browse files
committed
Update the test script
Signed-off-by: Vahid Janfaza <vjanfaza@qti.qualcomm.com>
1 parent 65a76bd commit 4fac443

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

examples/compute_context_length.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
## - The second comp_ctx_lengths_decode list will be used for decoding. During the decoding process, based on the position_id or cache index it will work with the specific compute-context-length in the list. It will start from a proper compute-context-length in the list based on input prompt length and will gradually increase the compute-context-length if the cache index passes the current compute-context-length. ##
1717

1818
ctx_len = 1024
19-
comp_ctx_lengths_prefill = [256]
20-
comp_ctx_lengths_decode = [512, ctx_len]
19+
comp_ctx_lengths_prefill = [256] # None
20+
comp_ctx_lengths_decode = [ctx_len] # None
2121

2222
# model_name = "google/gemma-7b"
2323
# model_name = "google/gemma-2-2b"
@@ -27,28 +27,31 @@
2727
# model_name = "microsoft/phi-1_5"
2828
# model_name = "microsoft/Phi-3-mini-4k-instruct"
2929
# model_name = "Qwen/Qwen2.5-7B-Instruct"
30-
model_name = "meta-llama/Llama-3.2-1B"
30+
# model_name = "meta-llama/Llama-3.2-1B"
3131
# model_name = "Qwen/Qwen3-1.7B"
3232
# model_name = "allenai/OLMo-2-0425-1B"
33-
# model_name = "ibm-granite/granite-3.3-2b-base"
33+
model_name = "ibm-granite/granite-3.3-2b-base"
34+
# model_name = "ibm-granite/granite-3.2-8b-instruct"
3435
# model_name = "meta-llama/Llama-3.3-70B-Instruct"
3536
# model_name = "Salesforce/codegen-350M-mono"
3637
# model_name = "tiiuae/falcon-7b-instruct"
3738
# model_name = "openai-community/gpt2"
3839
# model_name = "EleutherAI/gpt-j-6b"
39-
# model_name = "EleutherAI/gpt-j-6b"
4040

4141
model = QEFFAutoModelForCausalLM.from_pretrained(
4242
model_name,
4343
continuous_batching=True,
44+
comp_ctx_lengths_prefill=comp_ctx_lengths_prefill,
45+
comp_ctx_lengths_decode=comp_ctx_lengths_decode,
46+
ctx_len=ctx_len,
4447
)
4548

4649
# model compilation for either continuous or static batching. For continuous batching full_batch_size is needed.
4750
model.compile(
4851
prefill_seq_len=128,
4952
ctx_len=ctx_len,
5053
num_cores=16,
51-
num_devices=1,
54+
num_devices=4,
5255
full_batch_size=1,
5356
mxint8_kv_cache=True,
5457
mxfp6_matmul=True,

0 commit comments

Comments
 (0)