From 8dfa16fa15e570c1b7891fd2021d4f26faadf8ed Mon Sep 17 00:00:00 2001 From: zhxchen17 Date: Wed, 22 Oct 2025 07:49:20 -0700 Subject: [PATCH] [compile] Add fallback path to AOT compile when serialization fails. Summary: Fixing issue https://github.com/vllm-project/vllm/issues/27348 For dynamo caching, it's possible that the compilation succeeds but the serialization step fails. In this case, the failure of serialization step shouldn't block user from getting compilation results correctly. Therefore we add a handling of the serialization error and only give warning when model saving fails. When saving fails, VLLM model runner should be able to just fallback to the old path, and in the next process, it will fail to load dynamo cache but still fallback to retracing with dynamo + loading inductor cache, which is the same behavior to AOT compile turned of off. This is mostly a short term fix and in the long term we should resolve the serialization bugs by eliminating pickling of graph modules. i.e. Once https://github.com/vllm-project/vllm/pull/25205 is merged, we should be able to resolve the issue at a lower level. Test Plan: pytest tests/lora/test_quant_model.py Reviewers: Subscribers: Tasks: Tags: Signed-off-by: zhxchen17 --- vllm/compilation/decorators.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 4a4903035cf9..f9b344b84d73 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -402,8 +402,17 @@ def patched_inline_call(self_): output = self.aot_compiled_fn(self, *args, **kwargs) assert aot_compilation_path is not None assert cache_dir is not None - os.makedirs(cache_dir, exist_ok=True) - self.aot_compiled_fn.save_compiled_function(aot_compilation_path) + try: + os.makedirs(cache_dir, exist_ok=True) + self.aot_compiled_fn.save_compiled_function( + aot_compilation_path + ) + except Exception as e: + logger.warning( + "Cannot save aot compilation to path %s, error: %s", + aot_compilation_path, + str(e), + ) else: output = self.compiled_callable(*args, **kwargs) return output