[Bugfix] fix dots.llm1.inst (#29687)

ZJY0516 · ywang96 · web-flow · commit a51f4186f20d · 2025-11-28T15:25:26.000-08:00
Signed-off-by: zjy0516 &lt;riverclouds.zhu@qq.com&gt;
Co-authored-by: Roger Wang &lt;hey@rogerw.io&gt;
diff --git a/vllm/model_executor/models/dots1.py b/vllm/model_executor/models/dots1.py
@@ -181,13 +181,14 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         hidden_states = hidden_states.view(-1, hidden_dim)
 
         router_logits, _ = self.gate(hidden_states)
-        final_hidden_states = (
-            self.experts(hidden_states=hidden_states, router_logits=router_logits)
-            * self.routed_scaling_factor
-        )
 
+        shared_out, routed_out = self.experts(
+            hidden_states=hidden_states, router_logits=router_logits
+        )
         if self.shared_experts is not None:
-            final_hidden_states = final_hidden_states[0] + final_hidden_states[1]
+            final_hidden_states = (routed_out + shared_out) * self.routed_scaling_factor
+        else:
+            final_hidden_states = routed_out * self.routed_scaling_factor
 
         if self.tp_size > 1:
             final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)