Skip to content

Commit db99b6e

Browse files
committed
feat: support ds v3.2 encoding
1 parent 3c34246 commit db99b6e

File tree

4 files changed

+474
-0
lines changed

4 files changed

+474
-0
lines changed

rtp_llm/models/deepseek_v2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,3 +714,4 @@ def get_weight_cls():
714714
register_model("deepseek-v3-mtp", DeepSeekV3Mtp, ["DeepseekV3ForCausalLMNextN"])
715715
register_model("kimi_k2", DeepSeekV2, [])
716716
register_model("deepseek_v31", DeepSeekV2, [])
717+
register_model("deepseek_v32", DeepSeekV2, []) # DeepSeek-V3.2 uses same architecture as V3.1, with encoding script update

rtp_llm/openai/renderers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .chatglm4_renderer import ChatGlm4Renderer
44
from .chatglm45_renderer import ChatGlm45Renderer
55
from .deepseekv31_renderer import DeepseekV31Renderer
6+
from .deepseekv32_renderer import DeepseekV32Renderer
67
from .internvl_renderer import InternVLRenderer
78
from .kimik2_renderer import KimiK2Renderer
89
from .llava_renderer import LlavaRenderer
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
import importlib.util
2+
import logging
3+
import os
4+
import sys
5+
from typing import Optional
6+
7+
from typing_extensions import override
8+
9+
from rtp_llm.frontend.tokenizer_factory.tokenizers import BaseTokenizer
10+
from rtp_llm.openai.api_datatype import ChatCompletionRequest
11+
from rtp_llm.openai.renderer_factory_register import register_renderer
12+
from rtp_llm.openai.renderers.custom_renderer import RenderedInputs, RendererParams
13+
from rtp_llm.openai.renderers.reasoning_tool_base_renderer import (
14+
ReasoningToolBaseRenderer,
15+
)
16+
from rtp_llm.openai.renderers.sglang_helpers.function_call.base_format_detector import (
17+
BaseFormatDetector,
18+
)
19+
from rtp_llm.openai.renderers.sglang_helpers.reasoning_parser import ReasoningParser
20+
21+
22+
class DeepseekV32Renderer(ReasoningToolBaseRenderer):
23+
"""DeepSeek V3.2 Renderer
24+
25+
This renderer uses a dedicated Python encoding script instead of Jinja templates.
26+
The encoding script is loaded from the checkpoint's "encode" folder.
27+
28+
Key features:
29+
1. Loads encoding_dsv32.py from checkpoint/encode folder
30+
2. Uses encode_messages function for rendering
31+
3. Supports thinking mode and tool calls
32+
"""
33+
34+
def __init__(
35+
self,
36+
tokenizer: BaseTokenizer,
37+
renderer_params: RendererParams,
38+
):
39+
# Load the encoding module before calling super().__init__()
40+
self.encoding_module = self._load_encoding_module(renderer_params.ckpt_path)
41+
super().__init__(tokenizer, renderer_params)
42+
43+
def _load_encoding_module(self, ckpt_path: str):
44+
"""
45+
Load the encoding_dsv32.py module from the checkpoint's encode folder.
46+
47+
Args:
48+
ckpt_path: Path to the checkpoint directory
49+
50+
Returns:
51+
The loaded encoding module
52+
53+
Raises:
54+
FileNotFoundError: If the encoding script is not found
55+
ImportError: If the encoding script cannot be loaded
56+
"""
57+
encode_folder = os.path.join(ckpt_path, "encode")
58+
encoding_script_path = os.path.join(encode_folder, "encoding_dsv32.py")
59+
60+
if not os.path.exists(encoding_script_path):
61+
raise FileNotFoundError(
62+
f"DeepSeek V3.2 encoding script not found at {encoding_script_path}. "
63+
f"Please ensure the checkpoint includes the 'encode' folder with encoding_dsv32.py"
64+
)
65+
66+
try:
67+
spec = importlib.util.spec_from_file_location(
68+
"encoding_dsv32", encoding_script_path
69+
)
70+
if spec is None or spec.loader is None:
71+
raise ImportError(f"Failed to load spec from {encoding_script_path}")
72+
73+
module = importlib.util.module_from_spec(spec)
74+
sys.modules["encoding_dsv32"] = module
75+
spec.loader.exec_module(module)
76+
77+
logging.info(f"Successfully loaded DeepSeek V3.2 encoding module from {encoding_script_path}")
78+
return module
79+
except Exception as e:
80+
raise ImportError(
81+
f"Failed to load DeepSeek V3.2 encoding module from {encoding_script_path}: {str(e)}"
82+
)
83+
84+
@override
85+
def _setup_chat_template(self):
86+
"""
87+
DeepSeek V3.2 doesn't use Jinja templates.
88+
The chat_template attribute is set to None to indicate custom rendering.
89+
"""
90+
self.chat_template = None
91+
92+
def _build_prompt(self, request: ChatCompletionRequest) -> str:
93+
"""
94+
Build prompt string using the DeepSeek V3.2 encoding script.
95+
96+
Args:
97+
request: Chat completion request
98+
99+
Returns:
100+
str: Rendered prompt string
101+
"""
102+
# Convert request messages to the format expected by encoding_dsv32
103+
messages = []
104+
for msg in request.messages:
105+
message_dict = {"role": msg.role.value, "content": msg.content}
106+
107+
# Add tool_calls if present (on assistant messages)
108+
if hasattr(msg, "tool_calls") and msg.tool_calls:
109+
message_dict["tool_calls"] = [
110+
{
111+
"type": "function",
112+
"id": tc.id,
113+
"function": {
114+
"name": tc.function.name,
115+
"arguments": tc.function.arguments,
116+
}
117+
}
118+
for tc in msg.tool_calls
119+
]
120+
121+
# Add reasoning_content if present
122+
if hasattr(msg, "reasoning_content") and msg.reasoning_content:
123+
message_dict["reasoning_content"] = msg.reasoning_content
124+
125+
messages.append(message_dict)
126+
127+
# Add tools from request level to the first system message
128+
# According to encoding_dsv32 format, tools must be attached to a system message
129+
if request.tools:
130+
tools_data = [
131+
{
132+
"type": "function",
133+
"function": {
134+
"name": tool.function.name,
135+
"description": tool.function.description,
136+
"parameters": tool.function.parameters,
137+
}
138+
}
139+
for tool in request.tools
140+
]
141+
142+
# Find the first system message and add tools to it
143+
has_system = False
144+
for msg in messages:
145+
if msg["role"] == "system":
146+
msg["tools"] = tools_data
147+
has_system = True
148+
break
149+
150+
# If no system message exists, create one with tools
151+
if not has_system:
152+
messages.insert(0, {
153+
"role": "system",
154+
"content": "",
155+
"tools": tools_data
156+
})
157+
158+
# Determine thinking mode
159+
thinking_mode = "thinking" if self.in_think_mode(request) else "chat"
160+
161+
# Configure encoding
162+
# drop_thinking=True: Remove reasoning_content from historical assistant messages
163+
# add_default_bos_token=True: Always add BOS token since we encode full messages
164+
encode_config = {
165+
"thinking_mode": thinking_mode,
166+
"drop_thinking": True,
167+
"add_default_bos_token": True,
168+
}
169+
170+
# Override with custom configs if provided
171+
# Note: context parameter is not used since RTP-LLM always provides full message history
172+
if request.chat_template_kwargs:
173+
encode_config.update(request.chat_template_kwargs)
174+
175+
if (
176+
request.extra_configs
177+
and request.extra_configs.chat_template_kwargs
178+
and isinstance(request.extra_configs.chat_template_kwargs, dict)
179+
):
180+
encode_config.update(request.extra_configs.chat_template_kwargs)
181+
182+
try:
183+
# Use the encoding module to encode messages
184+
rendered_prompt = self.encoding_module.encode_messages(
185+
messages, **encode_config
186+
)
187+
188+
logging.debug(
189+
f"DeepSeek V3.2 rendered prompt (thinking_mode={thinking_mode}): {rendered_prompt[:200]}..."
190+
)
191+
192+
return rendered_prompt
193+
except Exception as e:
194+
logging.error(f"Failed to render DeepSeek V3.2 prompt: {str(e)}")
195+
raise ValueError(f"Error rendering DeepSeek V3.2 prompt: {str(e)}")
196+
197+
@override
198+
def render_chat(self, request: ChatCompletionRequest) -> RenderedInputs:
199+
"""
200+
Render chat messages using the DeepSeek V3.2 encoding script.
201+
202+
Args:
203+
request: Chat completion request
204+
205+
Returns:
206+
RenderedInputs with encoded token IDs and rendered prompt
207+
"""
208+
prompt = self._build_prompt(request)
209+
input_ids = self.tokenizer.encode(prompt)
210+
return RenderedInputs(input_ids=input_ids, rendered_prompt=prompt)
211+
212+
@override
213+
def _create_detector(
214+
self, _request: ChatCompletionRequest
215+
) -> Optional[BaseFormatDetector]:
216+
"""
217+
DeepSeek V3.2 uses custom DSML format for tool calls.
218+
For now, we don't use a detector. Tool parsing will be handled by the encoding module.
219+
"""
220+
return None
221+
222+
@override
223+
def _create_reasoning_parser(
224+
self, request: ChatCompletionRequest
225+
) -> Optional[ReasoningParser]:
226+
"""
227+
Create reasoning parser if in thinking mode.
228+
229+
Args:
230+
request: Chat completion request
231+
232+
Returns:
233+
ReasoningParser if thinking mode is enabled, None otherwise
234+
"""
235+
if not self.in_think_mode(request):
236+
return None
237+
238+
try:
239+
# Check if the rendered prompt should use thinking mode
240+
rendered_result = self.render_chat(request)
241+
if "<think>" in rendered_result.rendered_prompt:
242+
return ReasoningParser(model_type="deepseek-v3", force_reasoning=True)
243+
except Exception:
244+
return None
245+
246+
return None
247+
248+
249+
register_renderer("deepseek_v32", DeepseekV32Renderer)

0 commit comments

Comments
 (0)