Merge pull request #134 from foundation-model-stack/fix_test_scripts

JRosenkranz · web-flow · commit ea529c5699fa · 2025-09-19T11:34:09.000-04:00
update test scripts to work with 4 layer micro model
diff --git a/tests/models/test_scripts.py b/tests/models/test_scripts.py
@@ -17,9 +17,9 @@
 else:
     common_model_paths = common_model_paths.split(",")
 
-common_batch_sizes = [1, 8]
+common_batch_sizes = [1, 4]
 common_seq_lengths = [64]
-common_max_new_tokens = [12]
+common_max_new_tokens = [8]
 common_attn_types = ["sdpa", "paged"]
 
 common_params = list(
@@ -36,7 +36,10 @@
 
 
 def execute_script(execute_cmd):
-    current_env["MAX_SHAREDPROG_ITERS"] = f"{common_max_new_tokens[0]}"
+    # using these options temporarily
+    current_env["VLLM_DT_MAX_BATCH_TKV_LIMIT"] = "16384"
+    current_env["VLLM_DT_MAX_BATCH_SIZE"] = "4"
+    current_env["VLLM_DT_MAX_CONTEXT_LEN"] = "4096"
 
     with Popen(
         execute_cmd,
@@ -79,10 +82,10 @@ def execute_inference(model_path, batch_size, seq_length, max_new_tokens, attn_t
 
 
 common_asserts = [
-    "### Response:\nProvide a list of instructions for preparing chicken soup",
-    "### Response:\nExplain some popular greetings in Spanish.",
-    "### Response:\nExplain to me why ignorance is bliss.",
-    "### Response:\nI have just come into a very large sum of money",
+    "### Response:\n\n1.\n\nThe following",
+    "### Response:\n\n1.\n\nI am",
+    "### Response:\n\nI am not sure what you",
+    "### Response:\n\nI have just come into a",
 ]