vllm-project
diff --git a/‎tests/compile/test_dynamic_shapes_compilation.py‎
Lines changed: 147 additions & 0 deletions b/‎tests/compile/test_dynamic_shapes_compilation.py‎
Lines changed: 147 additions & 0 deletions
diff --git a/‎vllm/compilation/decorators.py‎
Lines changed: 22 additions & 6 deletions b/‎vllm/compilation/decorators.py‎
Lines changed: 22 additions & 6 deletions
diff --git a/‎vllm/compilation/wrapper.py‎
Lines changed: 10 additions & 1 deletion b/‎vllm/compilation/wrapper.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎vllm/config/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎vllm/config/__init__.py‎
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,147 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import gc
+
+import pytest
+import torch
+from torch.torch_version import TorchVersion
+
+from vllm import LLM, SamplingParams
+from vllm.config.compilation import DynamicShapesType
+
+
+def cleanup_gpu_memory():
+    """Clean up GPU memory after each test"""
+    gc.collect()  # Clear Python objects
+    torch.cuda.empty_cache()  # Clear PyTorch GPU memory cache
+    torch.cuda.synchronize()  # Wait for all GPU operations to complete
+
+
+def get_test_models():
+    """Get list of models to test based on PyTorch version"""
+    # Parse PyTorch version
+    result = ["microsoft/DialoGPT-small", "gpt2", "facebook/opt-125m"]
+    # Handle alpha versions by removing pre-release suffixes
+    version_parts = torch.__version__.split('+')[0].split('a')[0]
+    clean_version = version_parts.split('b')[0].split('rc')[0]
+    if TorchVersion(clean_version) >= TorchVersion("2.10"):
+
+        # Requires some fixes only available in PyTorch 2.10+
+        result.append("Qwen/Qwen2-1.5B-Instruct")
+        result.append("Qwen/Qwen2-7B-Instruct")
+        result.append("openlm-research/open_llama_13b")
+
+    return result
+
+
+@pytest.mark.parametrize("model_name", get_test_models())
+def test_dynamic_shapes_compilation(monkeypatch, model_name):
+    """Test that all dynamic shapes types produce compiles"""
+    print(f"\nTesting model: {model_name}")
+
+    # monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
+    monkeypatch.setenv("TOKENIZERS_PARALLELISM", "true")
+
+    prompt = "Hello, my name is"
+    results = {}
+
+    print("Testing EAGER (no compilation) baseline...")
+    cleanup_gpu_memory()
+
+    eager_model = LLM(
+        model=model_name,
+        compilation_config={
+            "level": 0,  # NO_COMPILATION - eager mode
+        },
+        # gpu_memory_utilization=0.2,
+    )
+
+    # Generate text with deterministic sampling parameters
+    sampling_params = SamplingParams(
+        max_tokens=10,
+        temperature=0.0,  # Deterministic generation
+        seed=42,  # Fixed seed for consistency
+    )
+    eager_output = eager_model.generate(prompt,
+                                        sampling_params=sampling_params)
+    results["EAGER"] = eager_output[0].outputs[0].text
+
+    # Cleanup model
+    del eager_model
+    cleanup_gpu_memory()
+
+    # Test all dynamic shapes types with compilation
+    for shapes_type in [
+            DynamicShapesType.BACKED, DynamicShapesType.UNBACKED,
+            DynamicShapesType.BACKED_SIZE_OBLIVIOUS
+    ]:
+        print(f"Testing {shapes_type.name} dynamic shapes...")
+
+        # Initialize the model with specific dynamic shapes configuration
+        model = LLM(
+            model=model_name,
+            compilation_config={
+                "level": 3,  # PIECEWISE compilation
+                "dynamic_shapes_config": {
+                    "dynamic_shapes_type": shapes_type.value,
+                    "eval_dynamo_ds_guards": False,
+                },
+            },
+            # gpu_memory_utilization=0.2,
+        )
+
+        output = model.generate(prompt, sampling_params=sampling_params)
+
+        # Store results for comparison
+        results[shapes_type.name] = output[0].outputs[0].text
+
+        # Cleanup model
+        del model
+        cleanup_gpu_memory()
+
+    # Verify all results are non-empty strings
+    for shape_type, result in results.items():
+        assert isinstance(result, str), f"{shape_type} should return a string"
+        assert len(
+            result.strip()) > 0, f"{shape_type} should generate non-empty text"
+
+    # Print results
+    for shape_type, result in results.items():
+        print(f"{shape_type}: '{result}'")
+
+
+if __name__ == "__main__":
+    """Run the test directly as a Python script"""
+    import os
+
+    print("Running dynamic shapes compilation test...")
+
+    # Get test models based on PyTorch version
+    test_models = get_test_models()
+    print(f"Testing {len(test_models)} models: {test_models}")
+
+    # Create a mock monkeypatch object for environment variables
+    class MockMonkeypatch:
+
+        def setenv(self, key, value):
+            os.environ[key] = value
+
+    monkeypatch = MockMonkeypatch()
+
+    # Run test for each model
+    for model_name in test_models:
+        try:
+            print(f"\n{'='*60}")
+            print(f"Testing model: {model_name}")
+            print(f"{'='*60}")
+
+            test_dynamic_shapes_compilation(monkeypatch, model_name)
+
+            print(f"✅ Test passed for {model_name}")
+
+        except Exception as e:
+            print(f"❌ Test failed for {model_name}: {e}")
+            raise
+
+    print("\n🎉 All tests completed successfully!")
@@ -12,6 +12,7 @@
 from vllm.compilation.counter import compilation_counter
 from vllm.compilation.wrapper import TorchCompileGuardsStripWrapper
 from vllm.config import CompilationLevel, VllmConfig
+from vllm.config.compilation import DynamicShapesType
 from vllm.logger import init_logger
 from vllm.sequence import IntermediateTensors
 from vllm.utils import supports_dynamo
@@ -78,6 +79,7 @@ def support_torch_compile(
     *,
     dynamic_arg_dims: Optional[dict[str, Union[int, list[int]]]] = None,
     enable_if: Optional[Callable[[VllmConfig], bool]] = None,
+    shape_invariants: Callable[..., None] = lambda *args, **kwargs: None
 ) -> Union[Callable[[_T], _T], _T]:
     """
     A decorator to add support for compiling the forward method of a class.
@@ -164,7 +166,7 @@ def cls_decorator_helper(cls: _T) -> _T:
                 raise ValueError(
                     f"Argument {k} not found in the forward method of {cls}")
         return _support_torch_compile(cls, inferred_dynamic_arg_dims,
-                                      enable_if)
+                                      enable_if, shape_invariants)
 
     if cls is not None:
         # use `support_torch_compile` as a decorator without arguments
@@ -178,7 +180,8 @@ def _support_torch_compile(
     cls: _T,
     dynamic_arg_dims: dict[str, Union[int, list[int]]],
     enable_if: Optional[Callable[[VllmConfig], bool]] = None,
-) -> _T:
+    shape_invariants: Callable[...,
+                               None] = lambda *args, **kwargs: None) -> _T:
     """
     A decorator to add support for compiling the forward method of a class.
     """
@@ -209,31 +212,41 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = '', **kwargs):
         if self.do_not_compile:
             return
 
+        self._check_shape_invariants = shape_invariants
+
         compilation_counter.num_models_seen += 1
         TorchCompileGuardsStripWrapper.__init__(self)
 
     cls.__init__ = __init__
 
-    def _mark_dynamic_inputs(mod, *args, **kwargs):
+    def _mark_dynamic_inputs(mod, dynamic_shapes_type, *args, **kwargs):
+
+        def mark_dynamic(arg, dims):
+            if dynamic_shapes_type == DynamicShapesType.UNBACKED:
+                torch._dynamo.decorators.mark_unbacked(arg, dims)
+            else:
+                torch._dynamo.mark_dynamic(arg, dims)
+
         sig = inspect.signature(mod.__class__.forward)
         bound_args = sig.bind(mod, *args, **kwargs)
         bound_args.apply_defaults()
         for k, dims in dynamic_arg_dims.items():
             arg = bound_args.arguments.get(k)
+
             if arg is not None:
                 dims = [dims] if isinstance(dims, int) else dims
                 if isinstance(arg, torch.Tensor):
                     # In case dims is specified with negative indexing
                     dims = [arg.ndim + dim if dim < 0 else dim for dim in dims]
-                    torch._dynamo.mark_dynamic(arg, dims)
+                    mark_dynamic(arg, dims)
                 elif isinstance(arg, IntermediateTensors):
                     for tensor in arg.tensors.values():
                         # In case dims is specified with negative indexing
                         dims = [
                             tensor.ndim + dim if dim < 0 else dim
                             for dim in dims
                         ]
-                        torch._dynamo.mark_dynamic(tensor, dims)
+                        mark_dynamic(tensor, dims)
                 else:
                     raise ValueError(
                         "Unsupported dynamic dimensions"
@@ -251,8 +264,11 @@ def __call__(self, *args, **kwargs):
             return TorchCompileGuardsStripWrapper.__call__(
                 self, *args, **kwargs)
 
+        _mark_dynamic_inputs(
+            self, self.vllm_config.compilation_config.dynamic_shapes_config.
+            dynamic_shapes_type, *args, **kwargs)
+
         # This is the path for the first compilation.
-        _mark_dynamic_inputs(self, *args, **kwargs)
 
         # the first compilation needs to have dynamic shapes marked
         start_monitoring_torch_compile(self.vllm_config)
 
@@ -22,6 +22,11 @@ class TorchCompileGuardsStripWrapper:
     (Since we drop all guards)
     """
 
+    def check_invariantes_and_forward(self, *args, **kwargs):
+        self._check_shape_invariants(*args, **kwargs)
+
+        return self.forward(*args, **kwargs)
+
     def __init__(self):
         self.compiled = False
 
@@ -42,7 +47,7 @@ def __init__(self):
             options["guard_filter_fn"] = lambda x: [False for _ in x]
 
         self._compiled_callable = torch.compile(
-            self.forward,
+            self.check_invariantes_and_forward,
             fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
             backend=backend,
             options=options,
@@ -54,9 +59,13 @@ def __call__(self, *args, **kwargs):
          method, for directly dispatching to the compiled code.
         """
         if not self.compiled:
+            # We check eagirly on the first compile as well.
+            self.check_invariantes_and_forward(*args, **kwargs)
+
             # Make sure a compilation is triggered by clearing dynamo cache.
             torch._dynamo.eval_frame.remove_from_cache(
                 self.original_code_object())
+
             self.compiled = True
 
             # Disable the C++ compilation of symbolic shape guards. C++-fication
 
@@ -1859,8 +1859,8 @@ class PoolerConfig:
     """
     Maximum input length allowed for embedding generation. When set, allows
     inputs longer than max_embed_len to be accepted for embedding models.
-    When an input exceeds max_embed_len, it will be handled according to 
-    the original max_model_len validation logic. 
+    When an input exceeds max_embed_len, it will be handled according to
+    the original max_model_len validation logic.
     Defaults to None (i.e. set to max_model_len).
     """