Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 57 additions & 4 deletions tinker_cookbook/renderers.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,14 +474,31 @@ class DeepSeekV3Renderer(Renderer):
For no-think, just use <|Assistant|></think>
"""

def _split_system_prompt(self, messages: list[Message]) -> tuple[str | None, list[Message]]:
"""
DeepSeek v3 only supports a single system message, placed immediately after BOS.
"""
system_prompt: str | None = None
filtered_messages: list[Message] = []
for idx, message in enumerate(messages):
if message["role"] == "system":
if idx == 0 and system_prompt is None:
system_prompt = message["content"]
continue
raise ValueError(
"DeepSeek v3 only supports a single system message as the first message in the conversation."
)
filtered_messages.append(message)
return system_prompt, filtered_messages

def _render_message(self, message: Message) -> tuple[list[int], list[int], list[int]]:
assert message.get("thinking") is None, "TODO: support CoT in DsV3 renderer"
if message["role"] == "user":
role_token = self._get_special_token("User")
elif message["role"] == "assistant":
role_token = self._get_special_token("Assistant")
else:
raise ValueError(f"Unsuppoerted role: {message['role']}")
raise ValueError(f"Unsupported role: {message['role']}")
ob = [role_token]
ac = self.tokenizer.encode(message["content"], add_special_tokens=False)

Expand All @@ -494,9 +511,12 @@ def _render_message(self, message: Message) -> tuple[list[int], list[int], list[
def build_generation_prompt(
self, messages: list[Message], role: Role = "assistant", prefill: str | None = None
) -> tinker.ModelInput:
system_prompt, non_system_messages = self._split_system_prompt(messages)
tokens: list[int] = []
tokens.extend(self._bos_tokens)
for message in messages:
if system_prompt:
tokens.extend(self.tokenizer.encode(system_prompt, add_special_tokens=False))
for message in non_system_messages:
ob_part, action_part, action_tail = self._render_message(message)
tokens.extend(ob_part)
tokens.extend(action_part)
Expand All @@ -514,10 +534,16 @@ def build_supervised_example(
"""
Get tokens and weights for action corresponding to final message
"""
system_prompt, non_system_messages = self._split_system_prompt(messages)
if not non_system_messages:
raise ValueError("DeepSeek v3 requires at least one non-system message in the conversation.")
start_tokens = list(self._bos_tokens)
if system_prompt:
start_tokens.extend(self.tokenizer.encode(system_prompt, add_special_tokens=False))
return build_supervised_example(
self._bos_tokens,
start_tokens,
lambda _idx, message: self._render_message(message),
messages,
non_system_messages,
train_on_what,
)

Expand Down Expand Up @@ -564,6 +590,31 @@ def build_generation_prompt(
return super().build_generation_prompt(messages, role, prefill)


class DeepSeekV3ForceThinkingRenderer(DeepSeekV3Renderer):
"""
Renderer that forces inclusion of a thinking block for DsV3 models.
"""

def _render_message(self, message: Message) -> tuple[list[int], list[int], list[int]]:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I copied this from the NoThinking renderer. Not sure exactly why it's here.

if message["role"] == "assistant":
content = message["content"]
new_content = content
if new_content.startswith("</think>"):
new_content = new_content[len("</think>") :]
if not new_content.startswith("<think>"):
new_content = "<think>" + new_content
Copy link
Contributor

@opherlieber opherlieber Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to support/use the Message's 'thinking' field here like gpt-oss? If we do want to support <think> also directly in the content, should we enforce here that there is also a </think> followed by an actual message after it? And not sure if for multi-message interaction the thinking tokens should be rendered in all previous assistant messages, or be kept only in the last message like in gpt-oss?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good questions. Haven't thought these things through yet. No rush to merge this one -- still experimenting with it in my use case.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Re: thinking field, we might as well prioritize supporting the most advanced tool use systems that are being released, which would include gpt-oss and kimi-k2-thinking, which use interleaved CoT + tool calls. So I'm in favor of whatever improves the support for those models.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what's the right policy for possibly hiding CoT -- in gpt-oss, I assume we include all the thinking traces from the current turn (i.e., back until the last user message), in cases where we have multiple assistant messages including tool calls?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, in gpt-oss we include thinking tokens from the current turn, even if there are multiple assistant channel messages

if new_content != content:
message = message.copy()
message["content"] = new_content
return super()._render_message(message)

def build_generation_prompt(
self, messages: list[Message], role: Role = "assistant", prefill: str | None = None
) -> tinker.ModelInput:
prefill = "<think>" + (prefill or "")
return super().build_generation_prompt(messages, role, prefill)


class GptOssRenderer(Renderer):
"""
Format like this (no newlines between messages, last message should end with <|return|> but be replaced by <|end|> when continuing the convo):
Expand Down Expand Up @@ -717,6 +768,8 @@ def get_renderer(name: str, tokenizer: Tokenizer) -> Renderer:
return DeepSeekV3Renderer(tokenizer)
elif name == "deepseekv3_disable_thinking":
return DeepSeekV3DisableThinkingRenderer(tokenizer)
elif name == "deepseekv3_force_thinking":
return DeepSeekV3ForceThinkingRenderer(tokenizer)
elif name == "gpt_oss_no_sysprompt":
return GptOssRenderer(tokenizer, use_system_prompt=False)
elif name == "gpt_oss_low_reasoning":
Expand Down
Loading