1111from packaging import version
1212from torch ._dynamo .symbolic_convert import InliningInstructionTranslator
1313
14+ import vllm .envs as envs
1415from vllm .compilation .counter import compilation_counter
1516from vllm .compilation .wrapper import TorchCompileWrapperWithCustomDispatcher
1617from vllm .config import CompilationLevel , VllmConfig
@@ -34,11 +35,11 @@ def ignore_torch_compile(cls: _T) -> _T:
3435 a support_torch_compile decorator, but we don't want to
3536 compile the class `cls` that inherits the parent class.
3637 This only ignores compiling the forward of the class the
37- decorator is applied to.
38+ decorator is applied to.
3839
3940 If the parent has ignore_torch_compile but the child has
4041 support_torch_compile, the child will still be compiled.
41-
42+
4243 If the class has one or more submodules
4344 that have support_torch_compile decorator applied, compile will
4445 not be ignored for those submodules.
@@ -224,6 +225,9 @@ def __call__(self, *args, **kwargs):
224225 if self .do_not_compile or torch .compiler .is_compiling ():
225226 return self .forward (* args , ** kwargs )
226227
228+ if getattr (self , "aot_compiled_fn" , None ) is not None :
229+ return self .aot_compiled_fn (self , * args , ** kwargs )
230+
227231 # the first compilation needs to have dynamic shapes marked
228232 if len (self .compiled_codes ) < 1 :
229233 sig = inspect .signature (self .__class__ .forward )
@@ -307,7 +311,11 @@ def patched_inline_call(parent, func, args, kwargs):
307311 ** dynamo_config_patches
308312 ), maybe_use_cudagraph_partition_wrapper (
309313 self .vllm_config ), _torch27_patch_tensor_subclasses ():
310- output = self .compiled_callable (* args , ** kwargs )
314+ if envs .VLLM_USE_AOT_COMPILE :
315+ self .aot_compiled_fn = self .aot_compile (* args , ** kwargs )
316+ output = self .aot_compiled_fn (self , * args , ** kwargs )
317+ else :
318+ output = self .compiled_callable (* args , ** kwargs )
311319 return output
312320
313321 # usually, capturing the model once is enough, and then we can
0 commit comments