Skip to content

Commit 12b8ad0

Browse files
committed
bugfix
1 parent 6e40359 commit 12b8ad0

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

include/flashinfer/attention/hopper/epilogue.cuh

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ struct CollectiveEpilogue {
168168
/*id=*/static_cast<int>(NamedBarriers::kValueEmpty));
169169
cute::copy(smem_tiled_copy_O, tOrO_retile, tOsO);
170170
cutlass::arch::fence_view_async_shared(); // ensure smem writes are visible to TMA
171-
cutlass::arch::NamedBarrier::arrive(NUM_MMA_THREADS + Ktraits::NUM_PRODUCER_THREADS,
171+
cutlass::arch::NamedBarrier::arrive(NUM_MMA_THREADS,
172172
cutlass::arch::ReservedNamedBarriers::EpilogueBarrier);
173173

174174
Tensor mLSE = make_tensor(make_gmem_ptr(epilogue_params.lse_ptr), epilogue_params.layout_LSE);
@@ -194,11 +194,10 @@ struct CollectiveEpilogue {
194194
}
195195
}
196196

197+
cutlass::arch::NamedBarrier::sync(NUM_MMA_THREADS,
198+
cutlass::arch::ReservedNamedBarriers::EpilogueBarrier);
199+
197200
int write_warp_idx = NUM_WARPS - 1;
198-
if (cutlass::canonical_warp_idx_sync() == write_warp_idx) {
199-
cutlass::arch::NamedBarrier::sync(NUM_MMA_THREADS + Ktraits::NUM_PRODUCER_THREADS,
200-
cutlass::arch::ReservedNamedBarriers::EpilogueBarrier);
201-
}
202201
TiledCopyO gmem_tiled_copy_O;
203202
write_O<NUM_COPY_THREADS>(epilogue_params.O_ptr, gmem_tiled_copy_O, epilogue_params.layout_O,
204203
select<0, 1>(TileShape_PDV{}), sO, thread_idx, qo_tile_idx,

0 commit comments

Comments
 (0)