Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions comms/ncclx/v2_27/meta/colltrace/tests/CollTraceDistTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1201,15 +1201,15 @@ TEST_F(CollTraceTest, winPutWait) {
int prevPeer = (this->globalRank + this->numRanks - 1) % this->numRanks;

for (auto iter = 0; iter < kNumIters; iter++) {
NCCLCHECK_TEST(ncclPutSignal(
NCCLCHECK_TEST(ncclPutSignal_old(
localbuf + kNumElements * statex->rank(),
kNumElements,
ncclInt32,
nextPeer,
kNumElements * statex->rank(),
win,
put_stream));
NCCLCHECK_TEST(ncclWaitSignal(prevPeer, win, wait_stream));
NCCLCHECK_TEST(ncclWaitSignal_old(prevPeer, win, wait_stream));
if (iter == 0) {
CUDACHECK_TEST(cudaEventRecord(start_event, put_stream));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ TEST_F(CollTraceWatchdogTest, TestAsyncErrorFromGPE) {
auto srcRank = (rank - 1 + worldSize) % worldSize;
auto dstRank = (rank + 1) % worldSize;

NCCLCHECK_FATAL(
ncclPutSignal(sendBuff, 32, ncclFloat, dstRank, 0, win, stream.raw()));
NCCLCHECK_FATAL(ncclWaitSignal(srcRank, win, stream.raw()));
NCCLCHECK_FATAL(ncclPutSignal_old(
sendBuff, 32, ncclFloat, dstRank, 0, win, stream.raw()));
NCCLCHECK_FATAL(ncclWaitSignal_old(srcRank, win, stream.raw()));
waitStreamWithTimeout(stream.raw(), std::chrono::seconds{20});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -622,15 +622,15 @@ TEST_F(CollTraceTest, winPutWait) {
int prevPeer = (this->globalRank + this->numRanks - 1) % this->numRanks;

for (auto iter = 0; iter < kNumIters; iter++) {
NCCLCHECK_TEST(ncclPutSignal(
NCCLCHECK_TEST(ncclPutSignal_old(
localbuf + kNumElements * statex->rank(),
kNumElements,
ncclInt32,
nextPeer,
kNumElements * statex->rank(),
win,
put_stream));
NCCLCHECK_TEST(ncclWaitSignal(prevPeer, win, wait_stream));
NCCLCHECK_TEST(ncclWaitSignal_old(prevPeer, win, wait_stream));
}

int errs = 0;
Expand Down
22 changes: 12 additions & 10 deletions comms/ncclx/v2_27/meta/rma/rma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@

NCCL_API(
ncclResult_t,
ncclPutSignal,
ncclPutSignal_old,
const void* origin_buff,
size_t count,
ncclDataType_t datatype,
int peer,
size_t target_disp,
ncclWin_t win,
cudaStream_t stream);
ncclResult_t ncclPutSignal(
ncclResult_t ncclPutSignal_old(
const void* origin_buff,
size_t count,
ncclDataType_t datatype,
Expand All @@ -29,7 +29,8 @@ ncclResult_t ncclPutSignal(
cudaStream_t stream) {
auto comm = win->comm->ctranComm_.get();
if (!ctranInitialized(comm)) {
FB_ERRORRETURN(ncclInternalError, "ncclPutSignal requires Ctran support");
FB_ERRORRETURN(
ncclInternalError, "ncclPutSignal_old requires Ctran support");
}
return metaCommToNccl(ctranPutSignal(
origin_buff,
Expand Down Expand Up @@ -111,21 +112,22 @@ ncclResult_t ncclGet(

NCCL_API(
ncclResult_t,
ncclWaitSignal,
ncclWaitSignal_old,
int peer,
ncclWin_t win,
cudaStream_t stream);
ncclResult_t ncclWaitSignal(int peer, ncclWin_t win, cudaStream_t stream) {
ncclResult_t ncclWaitSignal_old(int peer, ncclWin_t win, cudaStream_t stream) {
auto comm = win->comm->ctranComm_.get();
if (!ctranInitialized(comm)) {
FB_ERRORRETURN(ncclInternalError, "ncclWaitSignal requires Ctran support");
FB_ERRORRETURN(
ncclInternalError, "ncclWaitSignal_old requires Ctran support");
}
return metaCommToNccl(ctranWaitSignal(peer, win->ctranWindow, comm, stream));
}

NCCL_API(
ncclResult_t,
ncclPutSignal_v2,
ncclPutSignal,
const void* origin_buff,
size_t target_disp,
size_t count,
Expand All @@ -135,7 +137,7 @@ NCCL_API(
int peer,
ncclWin_t win,
cudaStream_t stream);
ncclResult_t ncclPutSignal_v2(
ncclResult_t ncclPutSignal(
const void* origin_buff,
size_t target_disp,
size_t count,
Expand Down Expand Up @@ -164,13 +166,13 @@ ncclResult_t ncclPutSignal_v2(

NCCL_API(
ncclResult_t,
ncclWaitSignal_v2,
ncclWaitSignal,
size_t signal_disp,
uint64_t cmp_val,
ncclCmpOp_t cmp_op,
ncclWin_t win,
cudaStream_t stream);
ncclResult_t ncclWaitSignal_v2(
ncclResult_t ncclWaitSignal(
size_t signal_disp,
uint64_t cmp_val,
ncclCmpOp_t cmp_op,
Expand Down
8 changes: 4 additions & 4 deletions comms/ncclx/v2_27/meta/rma/tests/RMATest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,15 @@ TEST_P(MultiWindowTestParam, multiWindow) {
int prevPeer = (this->globalRank + this->numRanks - 1) % this->numRanks;

for (auto iter = 0; iter < kNumIters; iter++) {
NCCLCHECK_TEST(ncclPutSignal(
NCCLCHECK_TEST(ncclPutSignal_old(
localbuf + numElements * statex->rank(),
numElements,
ncclInt32,
nextPeer,
numElements * statex->rank(),
win,
put_stream));
NCCLCHECK_TEST(ncclWaitSignal(prevPeer, win, wait_stream));
NCCLCHECK_TEST(ncclWaitSignal_old(prevPeer, win, wait_stream));
}
// Barrier to ensure all peers have finished put
this->barrier(comm, main_stream);
Expand Down Expand Up @@ -238,15 +238,15 @@ TEST_P(RMATestParam, winPutWait) {
int prevPeer = (this->globalRank + this->numRanks - 1) % this->numRanks;

for (auto iter = 0; iter < kNumIters; iter++) {
NCCLCHECK_TEST(ncclPutSignal(
NCCLCHECK_TEST(ncclPutSignal_old(
localBuf,
kNumElements,
ncclInt32,
nextPeer,
kNumElements * statex->rank(),
win,
put_stream));
NCCLCHECK_TEST(ncclWaitSignal(prevPeer, win, wait_stream));
NCCLCHECK_TEST(ncclWaitSignal_old(prevPeer, win, wait_stream));
if (iter == 0) {
// Skip first iteration to avoid any warmup overhead
CUDACHECK_TEST(cudaEventRecord(start_event, put_stream));
Expand Down
12 changes: 6 additions & 6 deletions comms/ncclx/v2_27/src/nccl.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -689,23 +689,23 @@ ncclResult_t pncclWinFree(ncclComm_t comm, ncclWin_t win);
* One-side put operation from a local buffer to a remote peer's pre-allocated
* and registered buffer within a NCCL window.
*/
ncclResult_t ncclPutSignal(
ncclResult_t ncclPutSignal_old(
const void* originBuff,
size_t count,
ncclDataType_t datatype,
int peer,
size_t targetDisp,
ncclWin_t win,
cudaStream_t stream);
ncclResult_t pncclPutSignal(
ncclResult_t pncclPutSignal_old(
const void* originBuff,
size_t count,
ncclDataType_t datatype,
int peer,
size_t targetDisp,
ncclWin_t win,
cudaStream_t stream);
ncclResult_t ncclPutSignal_v2(
ncclResult_t ncclPutSignal(
const void* originBuff,
size_t targetDisp,
size_t count,
Expand Down Expand Up @@ -745,13 +745,13 @@ ncclResult_t ncclPutSignal_v2(
/*
* Wait for a signal from remote peer to complete the put operation.
*/
ncclResult_t ncclWaitSignal(int peer, ncclWin_t win, cudaStream_t stream);
ncclResult_t pncclWaitSignal(int peer, ncclWin_t win, cudaStream_t stream);
ncclResult_t ncclWaitSignal_old(int peer, ncclWin_t win, cudaStream_t stream);
ncclResult_t pncclWaitSignal_old(int peer, ncclWin_t win, cudaStream_t stream);

/*
* Wait for a signal given the local signal displacement, the signal value, and the comparison op.
*/
ncclResult_t ncclWaitSignal_v2(
ncclResult_t ncclWaitSignal(
size_t signalDisp,
uint64_t cmpVal,
ncclCmpOp_t cmpOp,
Expand Down
2 changes: 1 addition & 1 deletion comms/torchcomms/ncclx/NcclxApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ ncclResult_t DefaultNcclxApi::winWaitSignal(
NcclxWindow win,
cudaStream_t stream) {
#ifdef NCCL_RMA_SUPPORTED
return ncclWaitSignal_v2(signal_disp, cmp_val, cmp_op, win, stream);
return ncclWaitSignal(signal_disp, cmp_val, cmp_op, win, stream);
#else
throw std::logic_error(
"NCCL does not support window, NCCL_RMA_SUPPORTED is not set");
Expand Down
Loading