Skip to content

Commit b968266

Browse files
committed
moved allreduce_strategy to BaseLlmArgs
Signed-off-by: Eran Geva <19514940+MrGeva@users.noreply.github.com>
1 parent 9b2f3f1 commit b968266

File tree

2 files changed

+12
-25
lines changed

2 files changed

+12
-25
lines changed

tensorrt_llm/_torch/auto_deploy/llm_args.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -123,25 +123,6 @@ class AutoDeployConfig(DynamicYamlMixInForSettings, BaseSettings):
123123

124124
device: str = Field(default="cuda", description="The device to use for the model.", frozen=True)
125125

126-
allreduce_strategy: Literal[
127-
"AUTO",
128-
"NCCL",
129-
"ONESHOT",
130-
"TWOSHOT",
131-
"MIN_LATENCY",
132-
"LOWPRECISION",
133-
"UB",
134-
"MNNVL",
135-
"NCCL_SYMMETRIC",
136-
] = Field(
137-
default="AUTO",
138-
description="AllReduce strategy for distributed inference. Options: AUTO (automatic selection), "
139-
"NCCL (NCCL-based), ONESHOT (single-phase fusion kernel), TWOSHOT (two-phase fusion kernel), "
140-
"MIN_LATENCY (minimum latency heuristic), LOWPRECISION (low precision allreduce), "
141-
"UB (unified buffer), MNNVL (multi-node NVLINK), NCCL_SYMMETRIC (NCCL symmetric). "
142-
"AUTO is recommended for most use cases.",
143-
)
144-
145126
# TODO: see if we can just remove this field and use kv_cache_config.dtype instead?
146127
kv_cache_dtype: str = Field(
147128
default="auto",

tensorrt_llm/llmapi/llm_args.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,6 +1577,18 @@ class BaseLlmArgs(StrictBaseModel):
15771577
default=None,
15781578
description="The expert parallel size for MoE models's expert weights.")
15791579

1580+
allreduce_strategy: Optional[Literal[
1581+
'AUTO', 'NCCL', 'UB', 'MINLATENCY', 'ONESHOT', 'TWOSHOT',
1582+
'LOWPRECISION', 'MNNVL', 'NCCL_SYMMETRIC']] = Field(
1583+
default='AUTO',
1584+
description=
1585+
"AllReduce strategy for distributed inference. Options: AUTO (automatic selection), "
1586+
"NCCL (NCCL-based), ONESHOT (single-phase fusion kernel), TWOSHOT (two-phase fusion kernel), "
1587+
"MIN_LATENCY (minimum latency heuristic), LOWPRECISION (low precision allreduce), "
1588+
"UB (unified buffer), MNNVL (multi-node NVLINK), NCCL_SYMMETRIC (NCCL symmetric). "
1589+
"AUTO is recommended for most use cases.",
1590+
status="beta")
1591+
15801592
enable_attention_dp: bool = Field(
15811593
default=False,
15821594
description="Enable attention data parallel.",
@@ -2531,12 +2543,6 @@ class TorchLlmArgs(BaseLlmArgs):
25312543
status="prototype",
25322544
)
25332545

2534-
allreduce_strategy: Optional[Literal[
2535-
'AUTO', 'NCCL', 'UB', 'MINLATENCY', 'ONESHOT', 'TWOSHOT',
2536-
'LOWPRECISION', 'MNNVL',
2537-
'NCCL_SYMMETRIC']] = Field(default='AUTO',
2538-
description="Allreduce strategy to use.",
2539-
status="beta")
25402546
checkpoint_loader: Optional[object] = Field(
25412547
default=None,
25422548
description=

0 commit comments

Comments
 (0)