Skip to content

Commit dd9b017

Browse files
committed
[None][feat] Add TRTLLM_NIXL_KVCACHE_BACKEND environment variable for NIXL backend selection
Allows users to specify which NIXL backend to use for KV cache transmission. Usage: TRTLLM_NIXL_KVCACHE_BACKEND=<backend> Currently supports only UCX backend. Unsupported backend types will fallback to Default (UCX). Signed-off-by: Yoray Zack <62789610+zackyoray@users.noreply.github.com>
1 parent 53491ff commit dd9b017

File tree

3 files changed

+40
-3
lines changed

3 files changed

+40
-3
lines changed

cpp/tensorrt_llm/common/envUtils.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,28 @@ std::string getEnvNixlInterface()
318318
return nixlInterface;
319319
}
320320

321+
std::string getEnvNixlBackend()
322+
{
323+
static std::once_flag flag;
324+
static std::string nixlBackend;
325+
326+
std::call_once(flag,
327+
[&]()
328+
{
329+
char const* nixl_backend = std::getenv("TRTLLM_NIXL_KVCACHE_BACKEND");
330+
if (nixl_backend)
331+
{
332+
nixlBackend = nixl_backend;
333+
}
334+
else
335+
{
336+
// Default to UCX if not specified
337+
nixlBackend = "UCX";
338+
}
339+
});
340+
return nixlBackend;
341+
}
342+
321343
bool getEnvDisaggLayerwise()
322344
{
323345
static bool const disaggLayerwise = getBoolEnv("TRTLLM_DISAGG_LAYERWISE");

cpp/tensorrt_llm/common/envUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ std::string getEnvUCXInterface();
8888

8989
std::string getEnvNixlInterface();
9090

91+
std::string getEnvNixlBackend();
92+
9193
bool getEnvDisaggLayerwise();
9294

9395
bool getEnvParallelCacheSend();

cpp/tensorrt_llm/executor/cache_transmission/nixl_utils/transferAgent.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <netdb.h>
2929
#include <netinet/in.h>
3030
#include <nixl_types.h>
31+
#include <set>
3132
#include <sys/file.h>
3233
#include <sys/stat.h>
3334
#include <unistd.h>
@@ -345,15 +346,27 @@ NixlTransferAgent::NixlTransferAgent(BaseAgentConfig const& config)
345346
mRawAgent = std::make_unique<nixlAgent>(config.mName, std::move(nixlConfig));
346347
}
347348

349+
std::string nixlBackend = common::getEnvNixlBackend();
350+
// List of supported backends - extend this list as new backends are added
351+
static const std::set<std::string> kSUPPORTED_BACKENDS = {"UCX"};
352+
353+
if (kSUPPORTED_BACKENDS.find(nixlBackend) == kSUPPORTED_BACKENDS.end())
354+
{
355+
TLLM_LOG_ERROR("Unsupported NIXL backend: %s, fallback to UCX", nixlBackend.c_str());
356+
nixlBackend = "UCX";
357+
}
358+
359+
TLLM_LOG_INFO("NixlTransferAgent::NixlTransferAgent using NIXL backend: %s", nixlBackend.c_str());
360+
348361
nixl_b_params_t init1;
349362
nixl_mem_list_t mems1;
350-
status = mRawAgent->getPluginParams("UCX", mems1, init1);
363+
status = mRawAgent->getPluginParams(nixlBackend.c_str(), mems1, init1);
351364
TLLM_CHECK(status == NIXL_SUCCESS);
352365

353-
status = mRawAgent->createBackend("UCX", init1, mRawBackend);
366+
status = mRawAgent->createBackend(nixlBackend.c_str(), init1, mRawBackend);
354367
if (status != NIXL_SUCCESS || !mRawBackend)
355368
{
356-
TLLM_THROW("Failed to create NIXL backend");
369+
TLLM_THROW("Failed to create NIXL backend: %s", nixlBackend.c_str());
357370
}
358371
mExtraParams.backends.push_back(mRawBackend);
359372
TLLM_LOG_INFO("NixlTransferAgent::NixlTransferAgent mAddress: %s", mAddress.c_str());

0 commit comments

Comments
 (0)