Skip to content

Commit b2d2462

Browse files
frost-intelCopilot
andauthored
Fix ProcessGroupXCCL options initialization (#2203)
Group UID needs to be set before creating the LogPrefix, and the initialization list order is fixed now. In addition, we remove the redefinition of certain members in ProcessGroupXCCL.hpp which then allows the existing python bindings to work properly. --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent f6eaac8 commit b2d2462

File tree

2 files changed

+4
-6
lines changed

2 files changed

+4
-6
lines changed

src/xccl/ProcessGroupXCCL.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -369,15 +369,15 @@ ProcessGroupXCCL::ProcessGroupXCCL(
369369
int size,
370370
c10::intrusive_ptr<Options> options)
371371
: Backend(rank, size),
372-
store_(store),
373-
options_(std::move(options)),
372+
store_(std::move(store)),
374373
xcclCommCounter_(0),
375-
local_id_(process_group_id++) {
374+
local_id_(process_group_id++),
375+
options_(std::move(options)) {
376+
this->setGroupUid(options_->group_name);
376377
logPrefix_ = createLogPrefix();
377378
blockingWait_ = getCvarBool(TORCH_XCCL_BLOCKING_WAIT, false);
378379
traceBufferSize_ = getCvarInt({"TORCH_FR_BUFFER_SIZE"}, 2000);
379380

380-
this->setGroupUid(options_->group_name);
381381
// In PGNCCL, the pg ranks are recorded on comm setup in each op, but we just
382382
// do it here.
383383
const auto XcclVersion = getXcclVersion();

src/xccl/ProcessGroupXCCL.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,6 @@ class TORCH_API ProcessGroupXCCL : public Backend {
128128
return c10::make_intrusive<Options>(is_high_priority_stream);
129129
}
130130
bool is_high_priority_stream;
131-
std::vector<uint64_t> global_ranks_in_group;
132-
std::string group_name;
133131
};
134132

135133
ProcessGroupXCCL(

0 commit comments

Comments
 (0)