🎨 Fix formatting

gkumbhat · gkumbhat · commit e7c18f851994 · 2025-07-30T10:37:06.000-05:00
Signed-off-by: gkumbhat &lt;kumbhat.gaurav@gmail.com&gt;
diff --git a/aiu_fms_testing_utils/utils/__init__.py b/aiu_fms_testing_utils/utils/__init__.py
@@ -67,6 +67,19 @@ def warmup_model(
     dprint(f"PT compile complete, took {pt_compile_model_time:.3f}s")
 
 
+def get_env_to_int_list(env_var_name, default):
+    """Utility function to convert list of strings passed as given environment variable to
+    list of integers
+    """
+    env_var_string = os.environ.get(env_var_name, default=default)
+    if not env_var_string:
+        return []
+    if isinstance(env_var_string, list):
+        return env_var_string
+
+    return [int(v) for v in env_var_string.split(",") if not isinstance(v, int)]
+
+
 def ids_for_prompt(prompt, tokenizer):
     tokens = tokenizer.tokenize(prompt)
     ids = tokenizer.convert_tokens_to_ids(tokens)
diff --git a/tests/testing/test_compilation.py b/tests/testing/test_compilation.py
@@ -1,22 +1,25 @@
 """This module contains test related to compilation operation"""
 
 # Standard
-import itertools
 import os
 import pytest
 import time
 
 # Third Party
-from torch import distributed as dist
 import torch
 
 # First Party
 from fms.models import get_model
-from fms.utils import generation, tokenizers
+from fms.utils import tokenizers
 from fms.utils.generation import pad_input_ids
 
 # Local
-from aiu_fms_testing_utils.utils import ids_for_prompt, sample_sharegpt_requests, warmup_model
+from aiu_fms_testing_utils.utils import (
+    ids_for_prompt,
+    get_env_to_int_list,
+    sample_sharegpt_requests,
+    warmup_model,
+)
 from aiu_fms_testing_utils.utils.aiu_setup import dprint
 
 GRANITE_3p3_8B_INSTRUCT = "ibm-granite/granite-3.3-8b-instruct"
@@ -26,25 +29,39 @@
 
 ATTN_NAME = "spyre_paged_attn"
 
-compile_dynamic_sendnn = True
+COMPILE_DYNAMIC_SHAPE = True
+
+
+common_model_paths = get_env_to_int_list("COMMON_MODEL_NAME", [GRANITE_3p3_8B_INSTRUCT])
+common_batch_sizes = get_env_to_int_list("FMS_TEST_SHAPES_COMMON_BATCH_SIZES", [1])
+common_seq_lengths = get_env_to_int_list("FMS_TEST_SHAPES_COMMON_SEQ_LENGTHS", [64])
+common_max_new_tokens = get_env_to_int_list(
+    "FMS_TEST_SHAPES_COMMON_MAX_NEW_TOKENS", [64]
+)
+common_expected_comp_time = get_env_to_int_list(
+    "COMMON_COMPILATION_EXPECTED_TIME", [10]
+)  # In minutes
 
-common_model_paths = [GRANITE_3p3_8B_INSTRUCT]
-common_batch_sizes = [1]
-common_seq_lengths = [256]
-common_shape_types = ["dynamic"]
-common_max_new_tokens = [128]
-common_expected_comp_time = [10] # In minutes
+COMMON_SHAPE_TYPE = "dynamic"
 
-if compile_dynamic_sendnn:
+
+if COMPILE_DYNAMIC_SHAPE:
+    import bisect
+
+    # the compiler supports certain max context lengths (VLLM_DT_MAX_CONTEXT_LEN)
+    # this will ensure that we select smallest supported VLLM_DT_MAX_CONTEXT_LEN that fits the largest possible context (prompt size + max_new_tokens)
+    __largest_context = max(common_seq_lengths) + max(common_max_new_tokens)
+    __supported_context_lengths = [256, 512, 1024, 2048, 4096, 8192]
     os.environ["VLLM_DT_MAX_CONTEXT_LEN"] = str(
-        (((max(common_seq_lengths) + max(common_max_new_tokens)) // 64) + 1) * 64
+        __supported_context_lengths[
+            bisect.bisect_left(__supported_context_lengths, __largest_context)
+        ]
     )
     os.environ["VLLM_DT_MAX_BATCH_SIZE"] = str(max(max(common_batch_sizes), 2))
 
-common_shapes = list(
+COMMON_SHAPES = list(
     zip(
         common_model_paths,
-        common_shape_types,
         common_batch_sizes,
         common_seq_lengths,
         common_max_new_tokens,
@@ -59,7 +76,7 @@ def __prepare_inputs(batch_size, seq_length, tokenizer, seed=0):
         SHARE_GPT_DATASET_PATH,
         batch_size,
         tokenizer,
-        int(seq_length / 2),
+        seq_length // 2,
         seq_length,
         seed,
     )
@@ -74,16 +91,18 @@ def __prepare_inputs(batch_size, seq_length, tokenizer, seed=0):
 @pytest.fixture(autouse=True)
 def reset_compiler():
     yield  # run the test
-    if not compile_dynamic_sendnn:
+    if not COMPILE_DYNAMIC_SHAPE:
         torch.compiler.reset()
         torch._dynamo.reset()
         os.environ.pop("COMPILATION_MODE", None)
 
 
 @pytest.mark.parametrize(
-    "model_path,shape_type,batch_size,seq_length,max_new_tokens,expected_comp_time", common_shapes
+    "model_path,batch_size,seq_length,max_new_tokens,expected_comp_time", COMMON_SHAPES
 )
-def test_compilation_time(model_path, shape_type, batch_size, seq_length, max_new_tokens, expected_comp_time):
+def test_compilation_time(
+    model_path, batch_size, seq_length, max_new_tokens, expected_comp_time
+):
     """Test to validate time taken for model compilation."""
     torch.manual_seed(42)
     torch.set_default_dtype(torch.float16)
@@ -104,7 +123,7 @@ def test_compilation_time(model_path, shape_type, batch_size, seq_length, max_ne
     model = get_model(
         architecture="hf_pretrained",
         device_type="cpu",
-        data_type= torch.float16,
+        data_type=torch.float16,
         fused_weights=False,
         **model_path_kwargs,
     )
@@ -117,21 +136,14 @@ def test_compilation_time(model_path, shape_type, batch_size, seq_length, max_ne
     extra_kwargs["attn_name"] = ATTN_NAME
 
     start_time = time.perf_counter()
-    if shape_type == "dynamic":
-        compile_dynamic_sendnn = True
+    if COMMON_SHAPE_TYPE == "dynamic":
+        COMPILE_DYNAMIC_SHAPE = True
     else:
-        compile_dynamic_sendnn = False
+        COMPILE_DYNAMIC_SHAPE = False
 
-    model.compile(
-        backend="sendnn", options={"sendnn.dynamic": compile_dynamic_sendnn}
-    )
+    model.compile(backend="sendnn", options={"sendnn.dynamic": COMPILE_DYNAMIC_SHAPE})
     warmup_model(
-        model,
-        input_ids,
-        max_new_tokens,
-        compile_dynamic_sendnn,
-        use_cache=False,
-        **extra_kwargs
+        model, input_ids, max_new_tokens, COMPILE_DYNAMIC_SHAPE, **extra_kwargs
     )
     end_time = time.perf_counter()