Skip to content

Commit ee66714

Browse files
committed
[LAA] Check if Ptr can be freed between Assume and CtxI. (llvm#161725)
When using information from dereferenceable assumptions, we need to make sure that the memory is not freed between the assume and the specified context instruction. Instead of just checking canBeFreed, check if there any calls that may free between the assume and the context instruction. This patch introduces a willNotFreeBetween to check for calls that may free between an assume and a context instructions, to also be used in llvm#161255. PR: llvm#161725 (cherry picked from commit 7ceef76)
1 parent d67f605 commit ee66714

File tree

4 files changed

+177
-8
lines changed

4 files changed

+177
-8
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,12 @@ LLVM_ABI bool isValidAssumeForContext(const Instruction *I,
618618
const DominatorTree *DT = nullptr,
619619
bool AllowEphemerals = false);
620620

621+
/// Returns true, if no instruction between \p Assume and \p CtxI may free
622+
/// memory and the function is marked as NoSync. The latter ensures the current
623+
/// function cannot arrange for another thread to free on its behalf.
624+
LLVM_ABI bool willNotFreeBetween(const Instruction *Assume,
625+
const Instruction *CtxI);
626+
621627
enum class OverflowResult {
622628
/// Always overflows in the direction of signed/unsigned min value.
623629
AlwaysOverflowsLow,

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,14 +233,25 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
233233
const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
234234

235235
// Check if we have a suitable dereferencable assumption we can use.
236-
if (!StartPtrV->canBeFreed()) {
237-
RetainedKnowledge DerefRK = getKnowledgeValidInContext(
238-
StartPtrV, {Attribute::Dereferenceable}, *AC,
239-
L->getLoopPredecessor()->getTerminator(), DT);
240-
if (DerefRK) {
241-
DerefBytesSCEV =
242-
SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue));
243-
}
236+
Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
237+
if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
238+
if (isa<BranchInst>(LoopPred->getTerminator()))
239+
CtxI = LoopPred->getTerminator();
240+
}
241+
RetainedKnowledge DerefRK;
242+
getKnowledgeForValue(StartPtrV, {Attribute::Dereferenceable}, *AC,
243+
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
244+
if (!isValidAssumeForContext(Assume, CtxI, DT))
245+
return false;
246+
if (StartPtrV->canBeFreed() &&
247+
!willNotFreeBetween(Assume, CtxI))
248+
return false;
249+
DerefRK = std::max(DerefRK, RK);
250+
return true;
251+
});
252+
if (DerefRK) {
253+
DerefBytesSCEV =
254+
SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue));
244255
}
245256

246257
if (DerefBytesSCEV->isZero())

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ using namespace llvm::PatternMatch;
9090
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
9191
cl::Hidden, cl::init(20));
9292

93+
/// Maximum number of instructions to check between assume and context
94+
/// instruction.
95+
static constexpr unsigned MaxInstrsToCheckForFree = 16;
9396

9497
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
9598
/// returns the element type's bitwidth.
@@ -552,6 +555,29 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
552555
return false;
553556
}
554557

558+
bool llvm::willNotFreeBetween(const Instruction *Assume,
559+
const Instruction *CtxI) {
560+
if (CtxI->getParent() != Assume->getParent() || !Assume->comesBefore(CtxI))
561+
return false;
562+
// Make sure the current function cannot arrange for another thread to free on
563+
// its behalf.
564+
if (!CtxI->getFunction()->hasNoSync())
565+
return false;
566+
567+
// Check if there are any calls between the assume and CtxI that may
568+
// free memory.
569+
for (const auto &[Idx, I] :
570+
enumerate(make_range(Assume->getIterator(), CtxI->getIterator()))) {
571+
// Limit number of instructions to walk.
572+
if (Idx > MaxInstrsToCheckForFree)
573+
return false;
574+
if (const auto *CB = dyn_cast<CallBase>(&I))
575+
if (!CB->hasFnAttr(Attribute::NoFree))
576+
return false;
577+
}
578+
return true;
579+
}
580+
555581
// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
556582
// we still have enough information about `RHS` to conclude non-zero. For
557583
// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops

llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,3 +624,129 @@ e.1:
624624
e.2:
625625
ret void
626626
}
627+
628+
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_nofree_via_context(ptr %A, ptr %B) nosync {
629+
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_nofree_via_context'
630+
; CHECK-NEXT: loop.header:
631+
; CHECK-NEXT: Memory dependences are safe with run-time checks
632+
; CHECK-NEXT: Dependences:
633+
; CHECK-NEXT: Run-time memory checks:
634+
; CHECK-NEXT: Check 0:
635+
; CHECK-NEXT: Comparing group GRP0:
636+
; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
637+
; CHECK-NEXT: Against group GRP1:
638+
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
639+
; CHECK-NEXT: Grouped accesses:
640+
; CHECK-NEXT: Group GRP0:
641+
; CHECK-NEXT: (Low: %B High: (2000 + %B))
642+
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
643+
; CHECK-NEXT: Group GRP1:
644+
; CHECK-NEXT: (Low: %A High: (2000 + %A))
645+
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
646+
; CHECK-EMPTY:
647+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
648+
; CHECK-NEXT: SCEV assumptions:
649+
; CHECK-EMPTY:
650+
; CHECK-NEXT: Expressions re-written:
651+
;
652+
entry:
653+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 2000) ]
654+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %B, i64 2000) ]
655+
br label %loop.header
656+
657+
loop.header:
658+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
659+
%gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
660+
%gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
661+
%l = load i32, ptr %gep.A, align 4
662+
store i32 0, ptr %gep.B, align 4
663+
%cntable.c.1 = icmp ult i64 %iv, 1000
664+
%iv.next = add nuw nsw i64 %iv, 1
665+
br i1 %cntable.c.1, label %b2, label %e.1
666+
667+
b2:
668+
%uncntable.c.0 = icmp eq i32 %l, 0
669+
br i1 %uncntable.c.0, label %e.2, label %b3
670+
671+
b3:
672+
%cntable.c.2 = icmp eq i64 %iv.next, 500
673+
br i1 %cntable.c.2, label %cleanup4, label %latch
674+
675+
latch:
676+
br label %loop.header
677+
678+
cleanup4:
679+
ret void
680+
681+
e.1:
682+
ret void
683+
684+
e.2:
685+
ret void
686+
}
687+
688+
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_missing_nofree_multiple_predecessors(ptr %A, ptr %B, i1 %c) nosync {
689+
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_missing_nofree_multiple_predecessors'
690+
; CHECK-NEXT: loop.header:
691+
; CHECK-NEXT: Memory dependences are safe with run-time checks
692+
; CHECK-NEXT: Dependences:
693+
; CHECK-NEXT: Run-time memory checks:
694+
; CHECK-NEXT: Check 0:
695+
; CHECK-NEXT: Comparing group GRP0:
696+
; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
697+
; CHECK-NEXT: Against group GRP1:
698+
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
699+
; CHECK-NEXT: Grouped accesses:
700+
; CHECK-NEXT: Group GRP0:
701+
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
702+
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
703+
; CHECK-NEXT: Group GRP1:
704+
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
705+
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
706+
; CHECK-EMPTY:
707+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
708+
; CHECK-NEXT: SCEV assumptions:
709+
; CHECK-EMPTY:
710+
; CHECK-NEXT: Expressions re-written:
711+
;
712+
entry:
713+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 2000) ]
714+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %B, i64 2000) ]
715+
br i1 %c, label %then, label %else
716+
717+
then:
718+
br label %loop.header
719+
720+
else:
721+
br label %loop.header
722+
723+
loop.header:
724+
%iv = phi i64 [ 0, %then ], [ 0, %else ], [ %iv.next, %latch ]
725+
%gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
726+
%gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
727+
%l = load i32, ptr %gep.A, align 4
728+
store i32 0, ptr %gep.B, align 4
729+
%cntable.c.1 = icmp ult i64 %iv, 1000
730+
%iv.next = add nuw nsw i64 %iv, 1
731+
br i1 %cntable.c.1, label %b2, label %e.1
732+
733+
b2:
734+
%uncntable.c.0 = icmp eq i32 %l, 0
735+
br i1 %uncntable.c.0, label %e.2, label %b3
736+
737+
b3:
738+
%cntable.c.2 = icmp eq i64 %iv.next, 500
739+
br i1 %cntable.c.2, label %cleanup4, label %latch
740+
741+
latch:
742+
br label %loop.header
743+
744+
cleanup4:
745+
ret void
746+
747+
e.1:
748+
ret void
749+
750+
e.2:
751+
ret void
752+
}

0 commit comments

Comments
 (0)