Skip to content

Commit a0e222f

Browse files
authored
[SimplifyCFG] Simplify uncond br with icmp & select (#165580)
Previously, SimplifyCFG only simplified unconditional branches when they met a pattern (`swicth` -> `icmp` -> `br` -> `phi`) as follows: ```LLVM switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] DEFAULT: %tmp = icmp eq i8 %A, 92 br label %end end: ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] ``` This PR supports a new and more generic pattern (`switch` -> `icmp` -> `select` -> `br` -> `phi` ) to simplify unconditional branches as follows: ```LLVM ; BEFORE case1: switch i32 %x, label %DEFAULT [ i32 0, label %end i32 1, label %case2 ] case2: br label %end DEFAULT: %tmp = icmp eq i32 %x, 2 %val = select i1 %tmp, i32 V3, i32 V4 br label %end end: ... = phi i32 [ V1, %case1 ], [ V2, %case2 ], [ %val, %DEFAULT ] ``` We prefer to split the edge to 'end' so that there are TWO entries of V3/V4 to the PHI, merging the icmp & select into the switch, as follows: ```LLVM ; AFTER case1: switch i32 %x, label %DEFAULT [ i32 0, label %end i32 1, label %case2 i32 2, label %case3 ] case2: br label %end case3: br label %end DEFAULT: br label %end end: ... = phi i32 [ V1, %case1 ], [ V2, %case2 ], [ V3, %case3 ], [ V4, %DEFAULT] ``` Alive2 Proof: https://alive2.llvm.org/ce/z/jYHM4f Promising Optimization Impact: dtcxzyw/llvm-opt-benchmark#3006
1 parent ffb5831 commit a0e222f

File tree

4 files changed

+245
-27
lines changed

4 files changed

+245
-27
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 111 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,9 @@ class SimplifyCFGOpt {
302302

303303
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
304304
IRBuilder<> &Builder);
305-
305+
bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
306+
SelectInst *Select,
307+
IRBuilder<> &Builder);
306308
bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
307309
bool hoistSuccIdenticalTerminatorToSwitchOrIf(
308310
Instruction *TI, Instruction *I1,
@@ -5023,25 +5025,102 @@ bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
50235025
/// the PHI, merging the third icmp into the switch.
50245026
bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
50255027
ICmpInst *ICI, IRBuilder<> &Builder) {
5028+
// Select == nullptr means we assume that there is a hidden no-op select
5029+
// instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5030+
return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
5031+
}
5032+
5033+
/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5034+
/// case. This is called when we find an icmp instruction (a seteq/setne with a
5035+
/// constant) and its following select instruction as the only TWO instructions
5036+
/// in a block that ends with an uncond branch. We are looking for a very
5037+
/// specific pattern that occurs when "
5038+
/// if (A == 1) return C1;
5039+
/// if (A == 2) return C2;
5040+
/// if (A < 3) return C3;
5041+
/// return C4;
5042+
/// " gets simplified. In this case, we merge the first two "branches of icmp"
5043+
/// into a switch, but then the default value goes to an uncond block with a lt
5044+
/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5045+
/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5046+
/// get something like:
5047+
///
5048+
/// case1:
5049+
/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5050+
/// case2:
5051+
/// br label %end
5052+
/// DEFAULT:
5053+
/// %tmp = icmp eq i8 %A, 2
5054+
/// %val = select i1 %tmp, i8 C3, i8 C4
5055+
/// br label %end
5056+
/// end:
5057+
/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5058+
///
5059+
/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5060+
/// to the PHI, merging the icmp & select into the switch, as follows:
5061+
///
5062+
/// case1:
5063+
/// switch i8 %A, label %DEFAULT [
5064+
/// i8 0, label %end
5065+
/// i8 1, label %case2
5066+
/// i8 2, label %case3
5067+
/// ]
5068+
/// case2:
5069+
/// br label %end
5070+
/// case3:
5071+
/// br label %end
5072+
/// DEFAULT:
5073+
/// br label %end
5074+
/// end:
5075+
/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5076+
bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5077+
ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
50265078
BasicBlock *BB = ICI->getParent();
50275079

5028-
// If the block has any PHIs in it or the icmp has multiple uses, it is too
5029-
// complex.
5030-
if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5080+
// If the block has any PHIs in it or the icmp/select has multiple uses, it is
5081+
// too complex.
5082+
/// TODO: support multi-phis in succ BB of select's BB.
5083+
if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5084+
(Select && !Select->hasOneUse()))
50315085
return false;
50325086

5033-
Value *V = ICI->getOperand(0);
5034-
ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5035-
50365087
// The pattern we're looking for is where our only predecessor is a switch on
50375088
// 'V' and this block is the default case for the switch. In this case we can
50385089
// fold the compared value into the switch to simplify things.
50395090
BasicBlock *Pred = BB->getSinglePredecessor();
50405091
if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
50415092
return false;
50425093

5094+
Value *IcmpCond;
5095+
ConstantInt *NewCaseVal;
5096+
CmpPredicate Predicate;
5097+
5098+
// Match icmp X, C
5099+
if (!match(ICI,
5100+
m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5101+
return false;
5102+
5103+
Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5104+
Instruction *User;
5105+
if (!Select) {
5106+
// If Select == nullptr, we can assume that there is a hidden no-op select
5107+
// just after icmp
5108+
SelectCond = ICI;
5109+
SelectTrueVal = Builder.getTrue();
5110+
SelectFalseVal = Builder.getFalse();
5111+
User = ICI->user_back();
5112+
} else {
5113+
SelectCond = Select->getCondition();
5114+
// Check if the select condition is the same as the icmp condition.
5115+
if (SelectCond != ICI)
5116+
return false;
5117+
SelectTrueVal = Select->getTrueValue();
5118+
SelectFalseVal = Select->getFalseValue();
5119+
User = Select->user_back();
5120+
}
5121+
50435122
SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5044-
if (SI->getCondition() != V)
5123+
if (SI->getCondition() != IcmpCond)
50455124
return false;
50465125

50475126
// If BB is reachable on a non-default case, then we simply know the value of
@@ -5063,9 +5142,9 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
50635142
// Ok, the block is reachable from the default dest. If the constant we're
50645143
// comparing exists in one of the other edges, then we can constant fold ICI
50655144
// and zap it.
5066-
if (SI->findCaseValue(Cst) != SI->case_default()) {
5145+
if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
50675146
Value *V;
5068-
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5147+
if (Predicate == ICmpInst::ICMP_EQ)
50695148
V = ConstantInt::getFalse(BB->getContext());
50705149
else
50715150
V = ConstantInt::getTrue(BB->getContext());
@@ -5076,25 +5155,30 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
50765155
return requestResimplify();
50775156
}
50785157

5079-
// The use of the icmp has to be in the 'end' block, by the only PHI node in
5158+
// The use of the select has to be in the 'end' block, by the only PHI node in
50805159
// the block.
50815160
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5082-
PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5161+
PHINode *PHIUse = dyn_cast<PHINode>(User);
50835162
if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
50845163
isa<PHINode>(++BasicBlock::iterator(PHIUse)))
50855164
return false;
50865165

5087-
// If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5088-
// true in the PHI.
5089-
Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5090-
Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5166+
// If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5167+
// edge gets SelectTrueVal in the PHI.
5168+
Value *DefaultCst = SelectFalseVal;
5169+
Value *NewCst = SelectTrueVal;
50915170

5092-
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5171+
if (ICI->getPredicate() == ICmpInst::ICMP_NE)
50935172
std::swap(DefaultCst, NewCst);
50945173

5095-
// Replace ICI (which is used by the PHI for the default value) with true or
5096-
// false depending on if it is EQ or NE.
5097-
ICI->replaceAllUsesWith(DefaultCst);
5174+
// Replace Select (which is used by the PHI for the default value) with
5175+
// SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5176+
if (Select) {
5177+
Select->replaceAllUsesWith(DefaultCst);
5178+
Select->eraseFromParent();
5179+
} else {
5180+
ICI->replaceAllUsesWith(DefaultCst);
5181+
}
50985182
ICI->eraseFromParent();
50995183

51005184
SmallVector<DominatorTree::UpdateType, 2> Updates;
@@ -5111,7 +5195,7 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
51115195
NewW = ((uint64_t(*W0) + 1) >> 1);
51125196
SIW.setSuccessorWeight(0, *NewW);
51135197
}
5114-
SIW.addCase(Cst, NewBB, NewW);
5198+
SIW.addCase(NewCaseVal, NewBB, NewW);
51155199
if (DTU)
51165200
Updates.push_back({DominatorTree::Insert, Pred, NewBB});
51175201
}
@@ -8302,13 +8386,18 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
83028386

83038387
// If the only instruction in the block is a seteq/setne comparison against a
83048388
// constant, try to simplify the block.
8305-
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
8389+
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
83068390
if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
83078391
++I;
83088392
if (I->isTerminator() &&
83098393
tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
83108394
return true;
8395+
if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8396+
tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8397+
Builder))
8398+
return true;
83118399
}
8400+
}
83128401

83138402
// See if we can merge an empty landing pad block with another which is
83148403
// equivalent.

llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @c1, ptr @c2, ptr @c3]
1616
; ENABLE: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @g1, ptr @g2, ptr @g3]
1717
; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @g1, ptr @g2, ptr @g3]
18-
; ENABLE: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @f1, ptr @f2, ptr @f3]
19-
; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @f1, ptr @f2, ptr @f3]
18+
; ENABLE: @{{.*}} = private unnamed_addr constant [4 x ptr] [ptr @f1, ptr @f2, ptr @f3, ptr @f4]
19+
; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [4 x ptr] [ptr @f1, ptr @f2, ptr @f3, ptr @f4]
2020

2121
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
2222
target triple = "armv7a--none-eabi"

llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -410,13 +410,12 @@ define i1 @single_value_with_mask(i32 %x) {
410410
; OPTNOLUT-NEXT: i32 21, label %[[END]]
411411
; OPTNOLUT-NEXT: i32 48, label %[[END]]
412412
; OPTNOLUT-NEXT: i32 16, label %[[END]]
413+
; OPTNOLUT-NEXT: i32 80, label %[[END]]
413414
; OPTNOLUT-NEXT: ]
414415
; OPTNOLUT: [[DEFAULT]]:
415-
; OPTNOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80
416-
; OPTNOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true
417416
; OPTNOLUT-NEXT: br label %[[END]]
418417
; OPTNOLUT: [[END]]:
419-
; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ [[SEL]], %[[DEFAULT]] ]
418+
; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ true, %[[DEFAULT]] ], [ false, %[[ENTRY]] ]
420419
; OPTNOLUT-NEXT: ret i1 [[RES]]
421420
;
422421
; TTINOLUT-LABEL: define i1 @single_value_with_mask(

llvm/test/Transforms/SimplifyCFG/switch_create.ll

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,6 +1314,136 @@ if.end:
13141314
ret void
13151315
}
13161316

1317+
define i32 @switch_with_icmp_select_after_it(i32 %x) {
1318+
; CHECK-LABEL: @switch_with_icmp_select_after_it(
1319+
; CHECK-NEXT: entry:
1320+
; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [
1321+
; CHECK-NEXT: i32 18, label [[END:%.*]]
1322+
; CHECK-NEXT: i32 21, label [[END]]
1323+
; CHECK-NEXT: i32 48, label [[END]]
1324+
; CHECK-NEXT: i32 16, label [[END]]
1325+
; CHECK-NEXT: i32 80, label [[SWITCH_EDGE:%.*]]
1326+
; CHECK-NEXT: ]
1327+
; CHECK: switch.edge:
1328+
; CHECK-NEXT: br label [[END]]
1329+
; CHECK: default:
1330+
; CHECK-NEXT: br label [[END]]
1331+
; CHECK: end:
1332+
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 3, [[DEFAULT]] ], [ 2, [[SWITCH_EDGE]] ]
1333+
; CHECK-NEXT: ret i32 [[RES]]
1334+
;
1335+
entry:
1336+
switch i32 %x, label %default [
1337+
i32 18, label %end
1338+
i32 21, label %end
1339+
i32 48, label %end
1340+
i32 16, label %end
1341+
]
1342+
default:
1343+
%cmp = icmp eq i32 %x, 80
1344+
; Create a new switch case BB for case 80.
1345+
%sel = select i1 %cmp, i32 2, i32 3
1346+
br label %end
1347+
end:
1348+
%res = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ]
1349+
ret i32 %res
1350+
}
1351+
1352+
define i32 @switch_with_icmp_select_after_it2(i32 %x) {
1353+
; CHECK-LABEL: @switch_with_icmp_select_after_it2(
1354+
; CHECK-NEXT: entry:
1355+
; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [
1356+
; CHECK-NEXT: i32 18, label [[END:%.*]]
1357+
; CHECK-NEXT: i32 21, label [[END]]
1358+
; CHECK-NEXT: i32 48, label [[END]]
1359+
; CHECK-NEXT: i32 16, label [[END]]
1360+
; CHECK-NEXT: i32 80, label [[END]]
1361+
; CHECK-NEXT: ]
1362+
; CHECK: default:
1363+
; CHECK-NEXT: br label [[END]]
1364+
; CHECK: end:
1365+
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 3, [[DEFAULT]] ], [ 1, [[ENTRY]] ]
1366+
; CHECK-NEXT: ret i32 [[RES]]
1367+
;
1368+
entry:
1369+
switch i32 %x, label %default [
1370+
i32 18, label %end
1371+
i32 21, label %end
1372+
i32 48, label %end
1373+
i32 16, label %end
1374+
]
1375+
default:
1376+
%cmp = icmp eq i32 %x, 80
1377+
; Should not create new case BB
1378+
%sel = select i1 %cmp, i32 1, i32 3
1379+
br label %end
1380+
end:
1381+
%res = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ]
1382+
ret i32 %res
1383+
}
1384+
1385+
define i32 @switch_with_icmp_select_after_it3(i32 %x) {
1386+
; CHECK-LABEL: @switch_with_icmp_select_after_it3(
1387+
; CHECK-NEXT: entry:
1388+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 80
1389+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 3, i32 1
1390+
; CHECK-NEXT: ret i32 [[SEL]]
1391+
;
1392+
entry:
1393+
switch i32 %x, label %default [
1394+
i32 18, label %end
1395+
i32 21, label %end
1396+
i32 48, label %end
1397+
i32 16, label %end
1398+
]
1399+
default:
1400+
%cmp = icmp eq i32 %x, 80
1401+
; Should not create new case BB
1402+
%sel = select i1 %cmp, i32 3, i32 1
1403+
br label %end
1404+
end:
1405+
%res = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ]
1406+
ret i32 %res
1407+
}
1408+
1409+
; TODO: support this case (multi-phis).
1410+
define i32 @switch_with_icmp_select_after_it_multi_phis(i32 %x) {
1411+
; CHECK-LABEL: @switch_with_icmp_select_after_it_multi_phis(
1412+
; CHECK-NEXT: entry:
1413+
; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [
1414+
; CHECK-NEXT: i32 18, label [[END:%.*]]
1415+
; CHECK-NEXT: i32 21, label [[END]]
1416+
; CHECK-NEXT: i32 48, label [[END]]
1417+
; CHECK-NEXT: i32 16, label [[END]]
1418+
; CHECK-NEXT: ]
1419+
; CHECK: default:
1420+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80
1421+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 2, i32 3
1422+
; CHECK-NEXT: br label [[END]]
1423+
; CHECK: end:
1424+
; CHECK-NEXT: [[RES1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[ENTRY]] ], [ 0, [[ENTRY]] ], [ 0, [[ENTRY]] ], [ 100, [[DEFAULT]] ]
1425+
; CHECK-NEXT: [[RES2:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ [[SEL]], [[DEFAULT]] ]
1426+
; CHECK-NEXT: [[RES:%.*]] = xor i32 [[RES1]], [[RES2]]
1427+
; CHECK-NEXT: ret i32 [[RES]]
1428+
;
1429+
entry:
1430+
switch i32 %x, label %default [
1431+
i32 18, label %end
1432+
i32 21, label %end
1433+
i32 48, label %end
1434+
i32 16, label %end
1435+
]
1436+
default:
1437+
%cmp = icmp eq i32 %x, 80
1438+
%sel = select i1 %cmp, i32 2, i32 3
1439+
br label %end
1440+
end:
1441+
%res1 = phi i32 [ 0, %entry ], [ 0, %entry ], [ 0, %entry ], [ 0, %entry ], [ 100, %default ]
1442+
%res2 = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ]
1443+
%res = xor i32 %res1, %res2
1444+
ret i32 %res
1445+
}
1446+
13171447
!0 = !{!"function_entry_count", i32 100}
13181448
!1 = !{!"branch_weights", i32 6, i32 10}
13191449
;.

0 commit comments

Comments
 (0)