@@ -1577,6 +1577,18 @@ class BaseLlmArgs(StrictBaseModel):
15771577 default = None ,
15781578 description = "The expert parallel size for MoE models's expert weights." )
15791579
1580+ allreduce_strategy : Optional [Literal [
1581+ 'AUTO' , 'NCCL' , 'UB' , 'MINLATENCY' , 'ONESHOT' , 'TWOSHOT' ,
1582+ 'LOWPRECISION' , 'MNNVL' , 'NCCL_SYMMETRIC' ]] = Field (
1583+ default = 'AUTO' ,
1584+ description =
1585+ "AllReduce strategy for distributed inference. Options: AUTO (automatic selection), "
1586+ "NCCL (NCCL-based), ONESHOT (single-phase fusion kernel), TWOSHOT (two-phase fusion kernel), "
1587+ "MIN_LATENCY (minimum latency heuristic), LOWPRECISION (low precision allreduce), "
1588+ "UB (unified buffer), MNNVL (multi-node NVLINK), NCCL_SYMMETRIC (NCCL symmetric). "
1589+ "AUTO is recommended for most use cases." ,
1590+ status = "beta" )
1591+
15801592 enable_attention_dp : bool = Field (
15811593 default = False ,
15821594 description = "Enable attention data parallel." ,
@@ -2531,12 +2543,6 @@ class TorchLlmArgs(BaseLlmArgs):
25312543 status = "prototype" ,
25322544 )
25332545
2534- allreduce_strategy : Optional [Literal [
2535- 'AUTO' , 'NCCL' , 'UB' , 'MINLATENCY' , 'ONESHOT' , 'TWOSHOT' ,
2536- 'LOWPRECISION' , 'MNNVL' ,
2537- 'NCCL_SYMMETRIC' ]] = Field (default = 'AUTO' ,
2538- description = "Allreduce strategy to use." ,
2539- status = "beta" )
25402546 checkpoint_loader : Optional [object ] = Field (
25412547 default = None ,
25422548 description =
0 commit comments