Skip to content

Commit d3f487a

Browse files
committed
[SCEV] Collect guard info for ICMP NE w/o constants. (llvm#160500)
When collecting information from loop guards, use UMax(1, %b - %a) for ICMP NE %a, %b, if neither are constant. This improves results in some cases, and will be even more useful together with * llvm#160012 * llvm#159942 https://alive2.llvm.org/ce/z/YyBvoT PR: llvm#160500 (cherry picked from commit 2d02726)
1 parent 2af416b commit d3f487a

File tree

4 files changed

+135
-10
lines changed

4 files changed

+135
-10
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15695,6 +15695,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
1569515695
const SCEV *OneAlignedUp =
1569615696
GetNextSCEVDividesByDivisor(One, DividesBy);
1569715697
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
15698+
} else {
15699+
if (LHS->getType()->isPointerTy()) {
15700+
LHS = SE.getLosslessPtrToIntExpr(LHS);
15701+
RHS = SE.getLosslessPtrToIntExpr(RHS);
15702+
if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS))
15703+
break;
15704+
}
15705+
auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) {
15706+
const SCEV *Sub = SE.getMinusSCEV(A, B);
15707+
AddRewrite(Sub, Sub,
15708+
SE.getUMaxExpr(Sub, SE.getOne(From->getType())));
15709+
};
15710+
AddSubRewrite(LHS, RHS);
15711+
AddSubRewrite(RHS, LHS);
15712+
continue;
1569815713
}
1569915714
break;
1570015715
default:

llvm/test/Analysis/ScalarEvolution/ptrtoint.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
382382
; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64
383383
; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
384384
; X64-NEXT: %i10 = sub i64 %i9, %i4
385-
; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
385+
; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
386386
; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
387387
; X64-NEXT: --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable }
388388
; X64-NEXT: %i12 = load i8, ptr %i11, align 1
@@ -393,7 +393,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
393393
; X64-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable }
394394
; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char
395395
; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
396-
; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -1
396+
; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2
397397
; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
398398
; X64-NEXT: Loop %bb6: Trip multiple is 1
399399
;
@@ -406,9 +406,9 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
406406
; X32-NEXT: %i8 = load i8, ptr %i7, align 1
407407
; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
408408
; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64
409-
; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
409+
; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
410410
; X32-NEXT: %i10 = sub i64 %i9, %i4
411-
; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
411+
; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
412412
; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
413413
; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
414414
; X32-NEXT: %i12 = load i8, ptr %i11, align 1
@@ -419,7 +419,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
419419
; X32-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable }
420420
; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char
421421
; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
422-
; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -1
422+
; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2
423423
; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
424424
; X32-NEXT: Loop %bb6: Trip multiple is 1
425425
;
@@ -459,7 +459,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
459459
; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64
460460
; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
461461
; X64-NEXT: %i10 = sub i64 %i9, %i4
462-
; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
462+
; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
463463
; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
464464
; X64-NEXT: --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable }
465465
; X64-NEXT: %i12 = load i8, ptr %i11, align 1
@@ -470,7 +470,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
470470
; X64-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable }
471471
; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped
472472
; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
473-
; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -1
473+
; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2
474474
; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
475475
; X64-NEXT: Loop %bb6: Trip multiple is 1
476476
;
@@ -483,9 +483,9 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
483483
; X32-NEXT: %i8 = load i8, ptr %i7, align 1
484484
; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
485485
; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64
486-
; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
486+
; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
487487
; X32-NEXT: %i10 = sub i64 %i9, %i4
488-
; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
488+
; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
489489
; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
490490
; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
491491
; X32-NEXT: %i12 = load i8, ptr %i11, align 1
@@ -496,7 +496,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
496496
; X32-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable }
497497
; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped
498498
; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
499-
; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -1
499+
; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2
500500
; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
501501
; X32-NEXT: Loop %bb6: Trip multiple is 1
502502
;
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -p indvars -S %s | FileCheck %s
3+
4+
declare i1 @cond()
5+
6+
define i64 @test_ptr_compare_guard(ptr %start, ptr %end) {
7+
; CHECK-LABEL: define i64 @test_ptr_compare_guard(
8+
; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*]]:
10+
; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[START]], [[END]]
11+
; CHECK-NEXT: br i1 [[C_0]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]]
12+
; CHECK: [[LOOP_HEADER_PREHEADER]]:
13+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
14+
; CHECK: [[LOOP_HEADER]]:
15+
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[LOOP_HEADER_PREHEADER]] ]
16+
; CHECK-NEXT: [[I64_IV:%.*]] = phi i64 [ [[I64_IV_NEXT:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ]
17+
; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond()
18+
; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[EXIT_LOOPEXIT:.*]]
19+
; CHECK: [[LOOP_LATCH]]:
20+
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1
21+
; CHECK-NEXT: [[I64_IV_NEXT]] = add nuw i64 [[I64_IV]], 1
22+
; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
23+
; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]]
24+
; CHECK: [[EXIT_LOOPEXIT]]:
25+
; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[I64_IV]], %[[LOOP_LATCH]] ]
26+
; CHECK-NEXT: br label %[[EXIT]]
27+
; CHECK: [[EXIT]]:
28+
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RES_PH]], %[[EXIT_LOOPEXIT]] ]
29+
; CHECK-NEXT: ret i64 [[RES]]
30+
;
31+
entry:
32+
%c.0 = icmp eq ptr %start, %end
33+
br i1 %c.0, label %exit, label %loop.header
34+
35+
loop.header:
36+
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
37+
%i64.iv = phi i64 [ 0, %entry ], [ %i64.iv.next, %loop.latch ]
38+
%c.1 = call i1 @cond()
39+
br i1 %c.1, label %loop.latch, label %exit
40+
41+
loop.latch:
42+
%ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
43+
%i64.iv.next = add i64 %i64.iv, 1
44+
%c.2 = icmp eq ptr %ptr.iv.next, %end
45+
br i1 %c.2, label %exit, label %loop.header
46+
47+
exit:
48+
%res = phi i64 [ 0, %entry ], [ %i64.iv, %loop.latch ], [ 0, %loop.header ]
49+
ret i64 %res
50+
}

llvm/test/Transforms/LoopUnroll/scevunroll.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,3 +435,63 @@ exit:
435435
}
436436

437437
declare void @fn(i32)
438+
439+
define void @peel_int_eq_condition(i32 %start) {
440+
; CHECK-LABEL: @peel_int_eq_condition(
441+
; CHECK-NEXT: entry:
442+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[START:%.*]], i32 100)
443+
; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
444+
; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]]
445+
; CHECK: loop.peel.begin:
446+
; CHECK-NEXT: br label [[LOOP_PEEL:%.*]]
447+
; CHECK: loop.peel:
448+
; CHECK-NEXT: [[C_0_PEEL:%.*]] = icmp eq i32 [[START]], [[START]]
449+
; CHECK-NEXT: br i1 [[C_0_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[LOOP_LATCH_PEEL:%.*]]
450+
; CHECK: if.then.peel:
451+
; CHECK-NEXT: call void @fn(i32 [[START]])
452+
; CHECK-NEXT: br label [[LOOP_LATCH_PEEL]]
453+
; CHECK: loop.latch.peel:
454+
; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add i32 [[START]], 1
455+
; CHECK-NEXT: [[EXITCOND_PEEL:%.*]] = icmp ne i32 [[IV_NEXT_PEEL]], [[TMP0]]
456+
; CHECK-NEXT: br i1 [[EXITCOND_PEEL]], label [[LOOP_PEEL_NEXT:%.*]], label [[EXIT:%.*]]
457+
; CHECK: loop.peel.next:
458+
; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]]
459+
; CHECK: loop.peel.next1:
460+
; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]]
461+
; CHECK: entry.peel.newph:
462+
; CHECK-NEXT: br label [[LOOP:%.*]]
463+
; CHECK: loop:
464+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
465+
; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[LOOP_LATCH]]
466+
; CHECK: if.then:
467+
; CHECK-NEXT: call void @fn(i32 [[IV]])
468+
; CHECK-NEXT: br label [[LOOP_LATCH]]
469+
; CHECK: loop.latch:
470+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
471+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP0]]
472+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
473+
; CHECK: exit.loopexit:
474+
; CHECK-NEXT: br label [[EXIT]]
475+
; CHECK: exit:
476+
; CHECK-NEXT: ret void
477+
;
478+
entry:
479+
br label %loop
480+
481+
loop:
482+
%iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
483+
%c.0 = icmp eq i32 %iv, %start
484+
br i1 %c.0, label %if.then, label %loop.latch
485+
486+
if.then:
487+
call void @fn(i32 %iv)
488+
br label %loop.latch
489+
490+
loop.latch:
491+
%iv.next = add i32 %iv, 1
492+
%ec = icmp slt i32 %iv, 100
493+
br i1 %ec, label %loop, label %exit
494+
495+
exit:
496+
ret void
497+
}

0 commit comments

Comments
 (0)