Skip to content

Commit ddeec11

Browse files
authored
[Bugfix][P/D] Reduce num_threads used by nixl ucx backend (#27196)
Signed-off-by: David Whyte-Gray <40244437+dagrayvid@users.noreply.github.com>
1 parent 86ed770 commit ddeec11

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -607,15 +607,25 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
607607
# TODO temporary, once nixl allows for telemetry flag in config
608608
# (next release), we can remove this env var.
609609
os.environ["NIXL_TELEMETRY_ENABLE"] = "1"
610+
610611
# Agent.
611612
non_ucx_backends = [b for b in self.nixl_backends if b != "UCX"]
613+
# Configure NIXL num_threads to avoid UAR exhaustion on Mellanox NICs.
614+
# Each UCX thread allocates UARs (doorbell pages) via DevX, and
615+
# excessive NIXL UAR usage can exhaust NIC UAR space. This can cause
616+
# components like NVSHMEM (used by DeepEP kernels) to fail during RDMA
617+
# initialization with "mlx5dv_devx_alloc_uar" errors.
618+
# Ref: https://network.nvidia.com/files/doc-2020/ethernet-adapters-programming-manual.pdf#page=63
619+
num_threads = vllm_config.kv_transfer_config.get_from_extra_config(
620+
"num_threads", 4
621+
)
612622
if nixl_agent_config is None:
613623
config = None
614624
else:
615625
config = (
616626
nixl_agent_config(backends=self.nixl_backends)
617627
if len(non_ucx_backends) > 0
618-
else nixl_agent_config(num_threads=8)
628+
else nixl_agent_config(num_threads=num_threads)
619629
)
620630

621631
self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()), config)

0 commit comments

Comments
 (0)