|
17 | 17 | else: |
18 | 18 | common_model_paths = common_model_paths.split(",") |
19 | 19 |
|
20 | | -common_batch_sizes = [1, 8] |
| 20 | +common_batch_sizes = [1, 4] |
21 | 21 | common_seq_lengths = [64] |
22 | | -common_max_new_tokens = [12] |
| 22 | +common_max_new_tokens = [8] |
23 | 23 | common_attn_types = ["sdpa", "paged"] |
24 | 24 |
|
25 | 25 | common_params = list( |
|
36 | 36 |
|
37 | 37 |
|
38 | 38 | def execute_script(execute_cmd): |
39 | | - current_env["MAX_SHAREDPROG_ITERS"] = f"{common_max_new_tokens[0]}" |
| 39 | + # using these options temporarily |
| 40 | + current_env["VLLM_DT_MAX_BATCH_TKV_LIMIT"] = "16384" |
| 41 | + current_env["VLLM_DT_MAX_BATCH_SIZE"] = "4" |
| 42 | + current_env["VLLM_DT_MAX_CONTEXT_LEN"] = "4096" |
40 | 43 |
|
41 | 44 | with Popen( |
42 | 45 | execute_cmd, |
@@ -79,10 +82,10 @@ def execute_inference(model_path, batch_size, seq_length, max_new_tokens, attn_t |
79 | 82 |
|
80 | 83 |
|
81 | 84 | common_asserts = [ |
82 | | - "### Response:\nProvide a list of instructions for preparing chicken soup", |
83 | | - "### Response:\nExplain some popular greetings in Spanish.", |
84 | | - "### Response:\nExplain to me why ignorance is bliss.", |
85 | | - "### Response:\nI have just come into a very large sum of money", |
| 85 | + "### Response:\n\n1.\n\nThe following", |
| 86 | + "### Response:\n\n1.\n\nI am", |
| 87 | + "### Response:\n\nI am not sure what you", |
| 88 | + "### Response:\n\nI have just come into a", |
86 | 89 | ] |
87 | 90 |
|
88 | 91 |
|
|
0 commit comments