Skip to content

Commit e7dec23

Browse files
authored
[ReplaceConstant] Don't create instructions for the same constant multiple times in the same basic block (#169141)
Fixes #167500.
1 parent b3428bb commit e7dec23

File tree

4 files changed

+140
-63
lines changed

4 files changed

+140
-63
lines changed

llvm/lib/IR/ReplaceConstant.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ static bool isExpandableUser(User *U) {
2222
return isa<ConstantExpr>(U) || isa<ConstantAggregate>(U);
2323
}
2424

25-
static SmallVector<Instruction *, 4> expandUser(BasicBlock::iterator InsertPt,
26-
Constant *C) {
27-
SmallVector<Instruction *, 4> NewInsts;
25+
static void expandUser(BasicBlock::iterator InsertPt, Constant *C,
26+
SmallVector<Instruction *, 4> &NewInsts) {
27+
NewInsts.clear();
2828
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
2929
Instruction *ConstInst = CE->getAsInstruction();
3030
ConstInst->insertBefore(*InsertPt->getParent(), InsertPt);
@@ -46,7 +46,6 @@ static SmallVector<Instruction *, 4> expandUser(BasicBlock::iterator InsertPt,
4646
} else {
4747
llvm_unreachable("Not an expandable user");
4848
}
49-
return NewInsts;
5049
}
5150

5251
bool llvm::convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts,
@@ -91,6 +90,11 @@ bool llvm::convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts,
9190

9291
// Replace those expandable operands with instructions
9392
bool Changed = false;
93+
// We need to cache the instructions we've already expanded to avoid expanding
94+
// the same constant multiple times in the same basic block, which is
95+
// problematic when the same constant is used in a phi node multiple times.
96+
DenseMap<std::pair<Constant *, BasicBlock *>, SmallVector<Instruction *, 4>>
97+
ConstantToInstructionMap;
9498
while (!InstructionWorklist.empty()) {
9599
Instruction *I = InstructionWorklist.pop_back_val();
96100
DebugLoc Loc = I->getDebugLoc();
@@ -105,7 +109,14 @@ bool llvm::convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts,
105109
if (auto *C = dyn_cast<Constant>(U.get())) {
106110
if (ExpandableUsers.contains(C)) {
107111
Changed = true;
108-
auto NewInsts = expandUser(BI, C);
112+
SmallVector<Instruction *, 4> &NewInsts =
113+
ConstantToInstructionMap[std::make_pair(C, BI->getParent())];
114+
// If the cached instruction is after the insertion point, we need to
115+
// create a new one. We can't simply move the cached instruction
116+
// because its operands (also expanded instructions) might not
117+
// dominate the new position.
118+
if (NewInsts.empty() || BI->comesBefore(NewInsts.front()))
119+
expandUser(BI, C, NewInsts);
109120
for (auto *NI : NewInsts)
110121
NI->setDebugLoc(Loc);
111122
InstructionWorklist.insert_range(NewInsts);

llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414

1515
; Use constant from different kernels
1616
;.
17-
; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t poison, align 2
18-
; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 2
19-
; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t poison, align 4
20-
; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 16
21-
; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 2
22-
; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t poison, align 16
23-
; CHECK: @llvm.amdgcn.kernel.k6.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k6.lds.t poison, align 16
17+
; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t poison, align 2, !absolute_symbol !0
18+
; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 2, !absolute_symbol !0
19+
; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t poison, align 4, !absolute_symbol !0
20+
; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 16, !absolute_symbol !0
21+
; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 2, !absolute_symbol !0
22+
; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t poison, align 16, !absolute_symbol !0
23+
; CHECK: @llvm.amdgcn.kernel.k6.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k6.lds.t poison, align 16, !absolute_symbol !0
2424
;.
2525
define amdgpu_kernel void @k0(i64 %x) {
2626
; CHECK-LABEL: @k0(
@@ -67,7 +67,7 @@ define amdgpu_kernel void @k3(i64 %x) {
6767
; CHECK-LABEL: @k3(
6868
; CHECK-NEXT: %1 = getelementptr inbounds [32 x i8], ptr addrspace(3) @llvm.amdgcn.kernel.k3.lds, i32 0, i32 16
6969
; CHECK-NEXT: %ptr1 = addrspacecast ptr addrspace(3) %1 to ptr
70-
; CHECK-NEXT: store i64 1, ptr %ptr1, align 1
70+
; CHECK-NEXT: store i64 1, ptr %ptr1, align 16
7171
; CHECK-NEXT: %2 = getelementptr inbounds [32 x i8], ptr addrspace(3) @llvm.amdgcn.kernel.k3.lds, i32 0, i32 24
7272
; CHECK-NEXT: %ptr2 = addrspacecast ptr addrspace(3) %2 to ptr
7373
; CHECK-NEXT: store i64 2, ptr %ptr2, align 8
@@ -98,9 +98,9 @@ define amdgpu_kernel void @k4(i64 %x) {
9898
; Multiple constexpr use in a same instruction.
9999
define amdgpu_kernel void @k5() {
100100
; CHECK-LABEL: @k5(
101-
; CHECK-NEXT: %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k5.lds to ptr
102-
; CHECK-NEXT: %2 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k5.lds to ptr
103-
; CHECK-NEXT: call void poison(ptr %1, ptr %2)
101+
; CHECK-NEXT: %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k5.lds to ptr
102+
; CHECK-NEXT: call void poison(ptr %1, ptr %1)
103+
; CHECK-NEXT: ret void
104104
;
105105
call void poison(ptr addrspacecast (ptr addrspace(3) @lds.4 to ptr), ptr addrspacecast (ptr addrspace(3) @lds.4 to ptr))
106106
ret void
@@ -113,13 +113,22 @@ define amdgpu_kernel void @k5() {
113113
; expression operands of store should be replaced by equivalent instruction sequences.
114114
define amdgpu_kernel void @k6() {
115115
; CHECK-LABEL: @k6(
116-
117-
; CHECK-NEXT: %1 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
118-
; CHECK-NEXT: %2 = ptrtoint ptr addrspace(3) %1 to i32
119-
; CHECK-NEXT: %3 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
120-
; CHECK-NEXT: store i32 %2, ptr addrspace(3) %3, align 8
121-
; CHECK-NEXT: ret void
116+
; CHECK-NEXT: %1 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
117+
; CHECK-NEXT: %2 = ptrtoint ptr addrspace(3) %1 to i32
118+
; CHECK-NEXT: %3 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
119+
; CHECK-NEXT: store i32 %2, ptr addrspace(3) %3, align 8
120+
; CHECK-NEXT: ret void
122121
;
122+
123123
store i32 ptrtoint (ptr addrspace(3) getelementptr inbounds ([4 x i32], ptr addrspace(3) @lds.5, i32 0, i32 2) to i32), ptr addrspace(3) getelementptr inbounds ([4 x i32], ptr addrspace(3) @lds.5, i32 0, i32 2)
124124
ret void
125125
}
126+
;.
127+
; CHECK: attributes #0 = { "amdgpu-lds-size"="2" }
128+
; CHECK: attributes #1 = { "amdgpu-lds-size"="4" }
129+
; CHECK: attributes #2 = { "amdgpu-lds-size"="32" }
130+
; CHECK: attributes #3 = { "amdgpu-lds-size"="2020" }
131+
; CHECK: attributes #4 = { "amdgpu-lds-size"="16" }
132+
;.
133+
; CHECK: !0 = !{i32 0, i32 1}
134+
;.

llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll

Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
12
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
23
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
34

@@ -9,73 +10,78 @@
910
@kern = addrspace(3) global float poison, align 4
1011

1112
; @a_func is only used from a non-kernel function so is rewritten
12-
; CHECK-NOT: @a_func
1313
; @b_both is used from a non-kernel function so is rewritten
14-
; CHECK-NOT: @b_both
1514
; sorted both < func, so @b_both at null and @a_func at 4
1615
@b_both = addrspace(3) global float poison, align 4
1716

18-
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4
19-
; CHECK: @llvm.amdgcn.kernel.timestwo.lds = internal addrspace(3) global %llvm.amdgcn.kernel.timestwo.lds.t poison, align 4
2017

21-
; CHECK-LABEL: @get_func()
22-
; CHECK: %0 = addrspacecast ptr addrspace(3) @llvm.amdgcn.module.lds to ptr
23-
; CHECK: %1 = ptrtoint ptr %0 to i64
24-
; CHECK: %2 = addrspacecast ptr addrspace(3) @llvm.amdgcn.module.lds to ptr
25-
; CHECK: %3 = ptrtoint ptr %2 to i64
26-
; CHECK: %4 = add i64 %1, %3
27-
; CHECK: %5 = inttoptr i64 %4 to ptr
28-
; CHECK: %6 = load i32, ptr %5, align 4
29-
; CHECK: ret i32 %6
3018
define i32 @get_func() local_unnamed_addr #0 {
19+
; CHECK-LABEL: define i32 @get_func() local_unnamed_addr {
20+
; CHECK-NEXT: [[ENTRY:.*:]]
21+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.module.lds to ptr
22+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
23+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[TMP1]]
24+
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
25+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
26+
; CHECK-NEXT: ret i32 [[TMP4]]
27+
;
3128
entry:
3229
%0 = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @a_func to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @a_func to ptr) to i64)) to ptr), align 4
3330
ret i32 %0
3431
}
3532

36-
; CHECK-LABEL: @set_func(i32 %x)
37-
; CHECK: %0 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1) to ptr
38-
; CHECK: %1 = ptrtoint ptr %0 to i64
39-
; CHECK: %2 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1) to ptr
40-
; CHECK: %3 = ptrtoint ptr %2 to i64
41-
; CHECK: %4 = add i64 %1, %3
42-
; CHECK: %5 = inttoptr i64 %4 to ptr
43-
; CHECK: store i32 %x, ptr %5, align 4
44-
; CHECK: ret void
4533
define void @set_func(i32 %x) {
34+
; CHECK-LABEL: define void @set_func(
35+
; CHECK-SAME: i32 [[X:%.*]]) {
36+
; CHECK-NEXT: [[ENTRY:.*:]]
37+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1) to ptr
38+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
39+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[TMP1]]
40+
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
41+
; CHECK-NEXT: store i32 [[X]], ptr [[TMP3]], align 4
42+
; CHECK-NEXT: ret void
43+
;
4644
entry:
4745
store i32 %x, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64)) to ptr), align 4
4846
ret void
4947
}
5048

51-
; CHECK-LABEL: @timestwo() #0
52-
; CHECK-NOT: call void @llvm.donothing()
5349

54-
; CHECK: %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
55-
; CHECK: %2 = ptrtoint ptr %1 to i64
56-
; CHECK: %3 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
57-
; CHECK: %4 = ptrtoint ptr %3 to i64
58-
; CHECK: %5 = add i64 %2, %4
59-
; CHECK: %6 = inttoptr i64 %5 to ptr
60-
; CHECK: %ld = load i32, ptr %6, align 4
61-
; CHECK: %mul = mul i32 %ld, 2
62-
; CHECK: %7 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
63-
; CHECK: %8 = ptrtoint ptr %7 to i64
64-
; CHECK: %9 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
65-
; CHECK: %10 = ptrtoint ptr %9 to i64
66-
; CHECK: %11 = add i64 %8, %10
67-
; CHECK: %12 = inttoptr i64 %11 to ptr
68-
; CHECK: store i32 %mul, ptr %12, align 4
69-
; CHECK: ret void
7050
define amdgpu_kernel void @timestwo() {
51+
; CHECK-LABEL: define amdgpu_kernel void @timestwo(
52+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
53+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
54+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
55+
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TIMESTWO_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
56+
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
57+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], [[TMP4]]
58+
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
59+
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[TMP6]], align 4
60+
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 2
61+
; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TIMESTWO_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
62+
; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
63+
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
64+
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
65+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP8]], [[TMP10]]
66+
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
67+
; CHECK-NEXT: store i32 [[MUL]], ptr [[TMP12]], align 4
68+
; CHECK-NEXT: ret void
69+
;
7170
%ld = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @kern to ptr) to i64)) to ptr), align 4
7271
%mul = mul i32 %ld, 2
7372
store i32 %mul, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @kern to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64)) to ptr), align 4
7473
ret void
7574
}
7675

77-
; CHECK-LABEL: @through_functions() #0
7876
define amdgpu_kernel void @through_functions() {
77+
; CHECK-LABEL: define amdgpu_kernel void @through_functions(
78+
; CHECK-SAME: ) #[[ATTR0]] {
79+
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
80+
; CHECK-NEXT: [[LD:%.*]] = call i32 @get_func()
81+
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
82+
; CHECK-NEXT: call void @set_func(i32 [[MUL]])
83+
; CHECK-NEXT: ret void
84+
;
7985
%ld = call i32 @get_func()
8086
%mul = mul i32 %ld, 4
8187
call void @set_func(i32 %mul)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-module-lds %s -o - | FileCheck %s
3+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-module-lds %s -o - | FileCheck %s
4+
5+
@lds = internal unnamed_addr addrspace(3) global [6144 x half] poison, align 2
6+
7+
define amdgpu_kernel void @test(ptr addrspace(1) %out) {
8+
; CHECK-LABEL: define amdgpu_kernel void @test(
9+
; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: [[ENTRY:.*]]:
11+
; CHECK-NEXT: switch i32 0, label %[[BB_3:.*]] [
12+
; CHECK-NEXT: i32 18, label %[[BB_2:.*]]
13+
; CHECK-NEXT: i32 1, label %[[BB_2]]
14+
; CHECK-NEXT: i32 0, label %[[BB_3]]
15+
; CHECK-NEXT: ]
16+
; CHECK: [[BB_1:.*]]:
17+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.test.lds to ptr
18+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
19+
; CHECK-NEXT: switch i32 0, label %[[BB_3]] [
20+
; CHECK-NEXT: i32 18, label %[[BB_2]]
21+
; CHECK-NEXT: i32 1, label %[[BB_2]]
22+
; CHECK-NEXT: i32 0, label %[[BB_3]]
23+
; CHECK-NEXT: ]
24+
; CHECK: [[BB_2]]:
25+
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[TMP1]], %[[BB_1]] ], [ [[TMP1]], %[[BB_1]] ], [ 10, %[[ENTRY]] ], [ 10, %[[ENTRY]] ]
26+
; CHECK-NEXT: store i64 [[PHI]], ptr addrspace(1) [[OUT]], align 8
27+
; CHECK-NEXT: br label %[[BB_3]]
28+
; CHECK: [[BB_3]]:
29+
; CHECK-NEXT: ret void
30+
;
31+
entry:
32+
switch i32 0, label %bb.3 [
33+
i32 18, label %bb.2
34+
i32 1, label %bb.2
35+
i32 0, label %bb.3
36+
]
37+
bb.1:
38+
switch i32 0, label %bb.3 [
39+
i32 18, label %bb.2
40+
i32 1, label %bb.2
41+
i32 0, label %bb.3
42+
]
43+
44+
bb.2:
45+
%phi = phi i64 [ ptrtoint (ptr addrspacecast (ptr addrspace(3) @lds to ptr) to i64), %bb.1 ], [ ptrtoint (ptr addrspacecast (ptr addrspace(3) @lds to ptr) to i64), %bb.1 ], [10, %entry], [10, %entry]
46+
store i64 %phi, ptr addrspace(1) %out, align 8
47+
br label %bb.3
48+
49+
bb.3:
50+
ret void
51+
}

0 commit comments

Comments
 (0)