Skip to content

Commit b4d7d3f

Browse files
authored
[mlir][NVVM] Add nvvm.membar operation (#166698)
Add nvvm.membar operation with level as defined in https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar This will be used to replace direct intrinsic call in CUDA Fortran for `threadfence()`, `threadfence_block` and `thread fence_system()` currently lowered here: https://github.com/llvm/llvm-project/blob/e700f157026bf8b4d58f936c5db8f152e269d77f/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp#L1310 The nvvm membar intrsinsic are also used in CUDA C/C++ (https://github.com/llvm/llvm-project/blob/49f55f4991227f3c7a2b8161bbf45c74b7023944/clang/lib/Headers/__clang_cuda_device_functions.h#L528)
1 parent 67198d1 commit b4d7d3f

File tree

3 files changed

+45
-0
lines changed

3 files changed

+45
-0
lines changed

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,23 @@ def NVVM_FenceProxyAcquireOp : NVVM_Op<"fence.proxy.acquire">,
12361236
let hasVerifier = 1;
12371237
}
12381238

1239+
def NVVM_MembarOp : NVVM_Op<"memory.barrier">,
1240+
Arguments<(ins MemScopeKindAttr:$scope)> {
1241+
let summary = "Memory barrier operation";
1242+
let description = [{
1243+
`membar` operation guarantees that prior memory accesses requested by this
1244+
thread are performed at the specified `scope`, before later memory
1245+
operations requested by this thread following the membar instruction.
1246+
1247+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar)
1248+
}];
1249+
1250+
let assemblyFormat = "$scope attr-dict";
1251+
let llvmBuilder = [{
1252+
createIntrinsicCall(builder, getMembarIntrinsicID($scope), {});
1253+
}];
1254+
}
1255+
12391256
def NVVM_FenceProxyReleaseOp : NVVM_Op<"fence.proxy.release">,
12401257
Arguments<(ins MemScopeKindAttr:$scope,
12411258
DefaultValuedAttr<ProxyKindAttr,

mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,20 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
291291
llvm_unreachable("Unsupported proxy kinds");
292292
}
293293

294+
static unsigned getMembarIntrinsicID(NVVM::MemScopeKind scope) {
295+
switch (scope) {
296+
case NVVM::MemScopeKind::CTA:
297+
return llvm::Intrinsic::nvvm_membar_cta;
298+
case NVVM::MemScopeKind::CLUSTER:
299+
return llvm::Intrinsic::nvvm_fence_sc_cluster;
300+
case NVVM::MemScopeKind::GPU:
301+
return llvm::Intrinsic::nvvm_membar_gl;
302+
case NVVM::MemScopeKind::SYS:
303+
return llvm::Intrinsic::nvvm_membar_sys;
304+
}
305+
llvm_unreachable("Unknown scope for memory barrier");
306+
}
307+
294308
#define TCGEN05LD(SHAPE, NUM) llvm::Intrinsic::nvvm_tcgen05_ld_##SHAPE##_##NUM
295309

296310
static llvm::Intrinsic::ID
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s
2+
3+
// CHECK-lABEL: @memorybarrier()
4+
llvm.func @memorybarrier() {
5+
// CHECK: call void @llvm.nvvm.membar.cta()
6+
nvvm.memory.barrier #nvvm.mem_scope<cta>
7+
// CHECK: call void @llvm.nvvm.fence.sc.cluster()
8+
nvvm.memory.barrier #nvvm.mem_scope<cluster>
9+
// CHECK: call void @llvm.nvvm.membar.gl()
10+
nvvm.memory.barrier #nvvm.mem_scope<gpu>
11+
// CHECK: call void @llvm.nvvm.membar.sys()
12+
nvvm.memory.barrier #nvvm.mem_scope<sys>
13+
llvm.return
14+
}

0 commit comments

Comments
 (0)