Skip to content

Commit 446912d

Browse files
wangln19wangln19DarkLight1337
authored
fix: allow HuggingFace standard chat template params via **kwargs (vllm-project#27622)
Signed-off-by: wangln19 <wanglinian@dev.wanglinian.msh-dev.svc.cluster.local> Signed-off-by: wangln19 <96399074+wangln19@users.noreply.github.com> Co-authored-by: wangln19 <wanglinian@dev.wanglinian.msh-dev.svc.cluster.local> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
1 parent a00d625 commit 446912d

File tree

2 files changed

+57
-1
lines changed

2 files changed

+57
-1
lines changed

tests/entrypoints/test_chat_utils.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,6 +1882,39 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
18821882
)
18831883
assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs
18841884

1885+
# Additional test: Verify HF base parameters work with **kwargs tokenizers
1886+
# This validates the fix for tokenizers like Kimi K2 that use **kwargs
1887+
# to receive standard HuggingFace parameters instead of declaring them explicitly
1888+
from vllm.entrypoints.chat_utils import _get_hf_base_chat_template_params
1889+
1890+
hf_base_params = _get_hf_base_chat_template_params()
1891+
# Verify common HF parameters are in the base class
1892+
assert {"add_generation_prompt", "tools", "continue_final_message"}.issubset(
1893+
hf_base_params
1894+
), f"Expected HF base params not found in {hf_base_params}"
1895+
1896+
# Test with a mock tokenizer that uses **kwargs (like Kimi K2)
1897+
class MockTokenizerWithKwargs:
1898+
def apply_chat_template(self, conversation, **kwargs):
1899+
return "mocked_output"
1900+
1901+
mock_tokenizer = MockTokenizerWithKwargs()
1902+
mock_kwargs = {
1903+
"add_generation_prompt": True,
1904+
"tools": tools,
1905+
"continue_final_message": False,
1906+
"unknown_param": "should_be_filtered",
1907+
}
1908+
resolved_mock = resolve_chat_template_kwargs(
1909+
mock_tokenizer, chat_template, mock_kwargs, raise_on_unexpected=False
1910+
)
1911+
# HF base params should pass through even with **kwargs tokenizer
1912+
assert "add_generation_prompt" in resolved_mock
1913+
assert "tools" in resolved_mock
1914+
assert "continue_final_message" in resolved_mock
1915+
# Unknown params should be filtered out
1916+
assert "unknown_param" not in resolved_mock
1917+
18851918

18861919
# NOTE: Qwen2-Audio default chat template is specially defined inside
18871920
# processor class instead of using `tokenizer_config.json`

vllm/entrypoints/chat_utils.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

44
import asyncio
5+
import inspect
56
import json
67
from abc import ABC, abstractmethod
78
from collections import Counter, defaultdict, deque
@@ -1515,6 +1516,24 @@ def _resolve_chat_template_kwargs(
15151516
_cached_resolve_chat_template_kwargs = lru_cache(_resolve_chat_template_kwargs)
15161517

15171518

1519+
@lru_cache
1520+
def _get_hf_base_chat_template_params() -> frozenset[str]:
1521+
# Get standard parameters from HuggingFace's base tokenizer class.
1522+
# This dynamically extracts parameters from PreTrainedTokenizer's
1523+
# apply_chat_template method, ensuring compatibility with tokenizers
1524+
# that use **kwargs to receive standard parameters.
1525+
1526+
# Read signature from HF's base class - the single source of truth
1527+
base_sig = inspect.signature(PreTrainedTokenizer.apply_chat_template)
1528+
# Exclude VAR_KEYWORD (**kwargs) and VAR_POSITIONAL (*args) placeholders
1529+
return frozenset(
1530+
p.name
1531+
for p in base_sig.parameters.values()
1532+
if p.kind
1533+
not in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
1534+
)
1535+
1536+
15181537
def resolve_chat_template_kwargs(
15191538
tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
15201539
chat_template: str,
@@ -1538,7 +1557,11 @@ def resolve_chat_template_kwargs(
15381557
if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False)
15391558
}
15401559
template_vars = _cached_resolve_chat_template_kwargs(chat_template)
1541-
accept_vars = (fn_kw | template_vars) - unexpected_vars
1560+
1561+
# Allow standard HF parameters even if tokenizer uses **kwargs to receive them
1562+
hf_base_params = _get_hf_base_chat_template_params()
1563+
1564+
accept_vars = (fn_kw | template_vars | hf_base_params) - unexpected_vars
15421565
return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}
15431566

15441567

0 commit comments

Comments
 (0)