Skip to content

Commit 4cedf09

Browse files
bokrzesiigcbot
authored andcommitted
Revert "[IGC Core][LLVM16][StatelessToStateful] DeterminePointerAlignment algorithm fix v2"
Revert "[IGC Core][LLVM16][StatelessToStateful] DeterminePointerAlignment algorithm fix v2"
1 parent a020b4e commit 4cedf09

File tree

13 files changed

+73
-307
lines changed

13 files changed

+73
-307
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStateful/StatelessToStateful.cpp

Lines changed: 63 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -520,9 +520,61 @@ bool StatelessToStateful::pointerIsFromKernelArgument(Value &ptr) {
520520
return false;
521521
}
522522

523-
bool StatelessToStateful::pointerIsPositiveOffsetFromKernelArgument(
524-
Function *F, Value *V, Value *&offset, unsigned int &argNumber, bool ignoreSyncBuffer,
525-
std::optional<llvm::Align> OriginalInstructionAlignment) {
523+
static alignment_t determinePointerAlignment(Value *Ptr, const DataLayout &DL, AssumptionCache *AC,
524+
Instruction *InsertionPt) {
525+
alignment_t BestAlign = 1;
526+
527+
// 1) Examine uses: look for loads/stores (which may carry explicit
528+
// alignment) or a GEP that reveals an ABI alignment from its element
529+
// type.
530+
for (User *U : Ptr->users()) {
531+
if (auto *LI = dyn_cast<LoadInst>(U)) {
532+
// Load has an explicit alignment.
533+
alignment_t LdAlign = LI->getAlign().value();
534+
if (LdAlign > BestAlign)
535+
BestAlign = LdAlign;
536+
} else if (auto *SI = dyn_cast<StoreInst>(U)) {
537+
// Store sets alignment only if the pointer we store into is Ptr.
538+
if (SI->getPointerOperand() == Ptr) {
539+
alignment_t StAlign = SI->getAlign().value();
540+
if (StAlign > BestAlign)
541+
BestAlign = StAlign;
542+
}
543+
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
544+
// If the GEP's source element type is sized, we can guess an ABI
545+
// alignment.
546+
Type *BaseTy = GEP->getSourceElementType();
547+
if (BaseTy && BaseTy->isSized()) {
548+
alignment_t GEPAlign = DL.getABITypeAlign(BaseTy).value();
549+
if (GEPAlign > BestAlign)
550+
BestAlign = GEPAlign;
551+
}
552+
}
553+
}
554+
555+
// 2) If this pointer is actually a function parameter, see if it has an
556+
// alignment attribute.
557+
if (auto *Arg = dyn_cast<Argument>(Ptr)) {
558+
if (Arg->hasAttribute(llvm::Attribute::Alignment)) {
559+
if (MaybeAlign ArgAlign = Arg->getParamAlign()) {
560+
alignment_t ArgAlignOrOne = ArgAlign.valueOrOne().value();
561+
if (ArgAlignOrOne > BestAlign)
562+
BestAlign = ArgAlignOrOne;
563+
}
564+
}
565+
}
566+
567+
// 3) Fallback: use LLVM's built-in assumption-based alignment analysis
568+
// (based on a.o. llvm.assume intrinsics).
569+
Align Known = getKnownAlignment(Ptr, DL, InsertionPt, AC);
570+
if (Known > BestAlign)
571+
BestAlign = Known.value();
572+
573+
return BestAlign;
574+
}
575+
576+
bool StatelessToStateful::pointerIsPositiveOffsetFromKernelArgument(Function *F, Value *V, Value *&offset,
577+
unsigned int &argNumber, bool ignoreSyncBuffer) {
526578
const DataLayout *DL = &F->getParent()->getDataLayout();
527579

528580
AssumptionCache *AC = getAC(F);
@@ -572,20 +624,9 @@ bool StatelessToStateful::pointerIsPositiveOffsetFromKernelArgument(
572624
// guarantted to be DW-aligned.)
573625
//
574626
// Note that implicit arg is always aligned.
575-
bool isAlignedPointee = false;
576-
577-
if (arg->isImplicitArg()) {
578-
isAlignedPointee = true;
579-
} else {
580-
auto knownAlignment = getKnownAlignment((Value *)arg->getArg(), *DL, F->getEntryBlock().getFirstNonPHI(), AC);
581-
582-
if (knownAlignment >= 4) {
583-
isAlignedPointee = true;
584-
} else if (OriginalInstructionAlignment.has_value() &&
585-
IGC::isStatefulAddrSpace(V->getType()->getPointerAddressSpace())) {
586-
isAlignedPointee = OriginalInstructionAlignment.value() >= 4;
587-
}
588-
}
627+
bool isAlignedPointee = arg->isImplicitArg()
628+
? true
629+
: determinePointerAlignment(base, *DL, AC, F->getEntryBlock().getFirstNonPHI()) >= 4;
589630

590631
// If m_hasBufferOffsetArg is true, the offset argument is added to
591632
// the final offset to make it definitely positive. Thus skip checking
@@ -614,7 +655,6 @@ bool StatelessToStateful::pointerIsPositiveOffsetFromKernelArgument(
614655
updateArgInfo(arg, gepProducesPositivePointer);
615656
}
616657
}
617-
618658
if ((m_hasBufferOffsetArg || (gepProducesPositivePointer && isAlignedPointee)) &&
619659
getOffsetFromGEP(F, GEPs, argNumber, arg->isImplicitArg(), offset)) {
620660
return true;
@@ -880,14 +920,12 @@ void StatelessToStateful::promote() {
880920
resAllocMD->uavsNumType += m_promotionMap.size();
881921
}
882922

883-
void StatelessToStateful::addToPromotionMap(Instruction &I, Value *Ptr,
884-
std::optional<llvm::Align> OriginalInstructionAlignment = std::nullopt) {
923+
void StatelessToStateful::addToPromotionMap(Instruction &I, Value *Ptr) {
885924
Value *offset = nullptr;
886925
unsigned baseArgNumber = 0;
887926

888-
bool isPromotable =
889-
m_promotionMap.size() < maxPromotionCount &&
890-
pointerIsPositiveOffsetFromKernelArgument(m_F, Ptr, offset, baseArgNumber, true, OriginalInstructionAlignment);
927+
bool isPromotable = m_promotionMap.size() < maxPromotionCount &&
928+
pointerIsPositiveOffsetFromKernelArgument(m_F, Ptr, offset, baseArgNumber, true);
891929

892930
if (isPromotable) {
893931
InstructionInfo II(&I, Ptr, offset);
@@ -986,7 +1024,7 @@ void StatelessToStateful::visitCallInst(CallInst &I) {
9861024

9871025
void StatelessToStateful::visitLoadInst(LoadInst &I) {
9881026
Value *ptr = I.getPointerOperand();
989-
addToPromotionMap(I, ptr, I.getAlign());
1027+
addToPromotionMap(I, ptr);
9901028

9911029
// check if there's non-kernel-arg load/store
9921030
if (IGC_IS_FLAG_ENABLED(DumpHasNonKernelArgLdSt) && ptr != nullptr && !pointerIsFromKernelArgument(*ptr)) {
@@ -998,7 +1036,7 @@ void StatelessToStateful::visitLoadInst(LoadInst &I) {
9981036

9991037
void StatelessToStateful::visitStoreInst(StoreInst &I) {
10001038
Value *ptr = I.getPointerOperand();
1001-
addToPromotionMap(I, ptr, I.getAlign());
1039+
addToPromotionMap(I, ptr);
10021040

10031041
if (IGC_IS_FLAG_ENABLED(DumpHasNonKernelArgLdSt) && ptr != nullptr && !pointerIsFromKernelArgument(*ptr)) {
10041042
ModuleMetaData *modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStateful/StatelessToStateful.hpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,7 @@ class StatelessToStateful : public llvm::ModulePass, public llvm::InstVisitor<St
8282
bool getModuleUsesBindless();
8383

8484
void findPromotableInstructions();
85-
void addToPromotionMap(llvm::Instruction &I, llvm::Value *Ptr,
86-
std::optional<llvm::Align> OriginalInstructionAlignment);
85+
void addToPromotionMap(llvm::Instruction &I, llvm::Value *Ptr);
8786

8887
void promote();
8988
void promoteInstruction(InstructionInfo &InstInfo);
@@ -107,8 +106,7 @@ class StatelessToStateful : public llvm::ModulePass, public llvm::InstVisitor<St
107106
// ignoreSyncBuffer - when set to true, return false directly if V is from the implicit kernel
108107
// argument "sync buffer". sync buffer must be stateless access in ZEBinary path so cannot be promoted.
109108
bool pointerIsPositiveOffsetFromKernelArgument(llvm::Function *F, llvm::Value *V, llvm::Value *&offset,
110-
unsigned int &argNumber, bool ignoreSyncBuffer,
111-
std::optional<llvm::Align> OriginalInstructionAlignment);
109+
unsigned int &argNumber, bool ignoreSyncBuffer);
112110

113111
// Check if the given pointer value can be traced back to any kernel argument.
114112
// return the kernel argument if found, otherwise return nullptr.

IGC/Compiler/tests/StatelessToStateful/Bindful/buffer_image-ro-buffer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
2525

2626
%spirv.Image._void_1_0_0_0_0_0_0 = type opaque
2727

28-
define spir_kernel void @test(i32 addrspace(1)* align 4 %srcA, %spirv.Image._void_1_0_0_0_0_0_0 addrspace(1)* %img, i32 addrspace(1)* align 4 %dst) {
28+
define spir_kernel void @test(i32 addrspace(1)* %srcA, %spirv.Image._void_1_0_0_0_0_0_0 addrspace(1)* %img, i32 addrspace(1)* %dst) {
2929
entry:
3030
; BTI for image has been assigned by OCLBIConverter
3131
%color = call <4 x i32> @llvm.genx.GenISA.ldptr.v4i32.p196608f32.p196608f32(i32 0, i32 0, i32 0, i32 0, float addrspace(196608)* undef, float addrspace(196608)* null, i32 0, i32 0, i32 0)

IGC/Compiler/tests/StatelessToStateful/Bindful/buffer_image-wo_buffer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
2020

2121
%spirv.Image._void_1_0_0_0_0_0_1 = type opaque
2222

23-
define spir_kernel void @test(i32 addrspace(1)* align 4 %srcA, %spirv.Image._void_1_0_0_0_0_0_1 addrspace(1)* %img, i32 addrspace(1)* align 4 %dst) {
23+
define spir_kernel void @test(i32 addrspace(1)* %srcA, %spirv.Image._void_1_0_0_0_0_0_1 addrspace(1)* %img, i32 addrspace(1)* %dst) {
2424
entry:
2525
; BTI for image has been assigned by OCLBIConverter
2626
call void @llvm.genx.GenISA.typedwrite.p131072f32(float addrspace(131072)* null, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)

IGC/Compiler/tests/StatelessToStateful/Bindful/buffer_scalar_buffer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
; CHECK: target datalayout = {{.*}}-p131072:32:32:32-p131073:32:32:32"
1919
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
2020

21-
define spir_kernel void @test(i32 addrspace(1)* align 4 %srcA, i32 %srcB, i32 addrspace(1)* align 4 %dst) {
21+
define spir_kernel void @test(i32 addrspace(1)* %srcA, i32 %srcB, i32 addrspace(1)* %dst) {
2222
entry:
2323
; CHECK: %[[VAL_A:[0-9]+]] = load i32, i32 addrspace(131072)* %{{.*}}, align 4
2424
%ptrA = getelementptr inbounds i32, i32 addrspace(1)* %srcA, i64 1

IGC/Compiler/tests/StatelessToStateful/Bindful/has_non_kernel_arg_LdSt.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
; CHECK: CheckModuleDebugify: PASS
2020

2121

22-
define spir_kernel void @func_b(i32 %n, i32 addrspace(1)* align 4 %r, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i8 addrspace(1)* %s2, i8 addrspace(1)* %s3, i32 %s4, i32 %s5, i32 %bufferOffset) #0 {
22+
define spir_kernel void @func_b(i32 %n, i32 addrspace(1)* %r, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i8 addrspace(1)* %s2, i8 addrspace(1)* %s3, i32 %s4, i32 %s5, i32 %bufferOffset) #0 {
2323
; CHECK-LABEL: @func_b(
2424
; CHECK-NEXT: entry:
2525
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[R:%.*]], i32 16, !dbg [[DBG104:![0-9]+]]

IGC/Compiler/tests/StatelessToStateful/Bindful/hoist_loads_typed_pointers.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
; RUN: igc_opt --typed-pointers %s -S -o - -igc-stateless-to-stateful-resolution | FileCheck %s
1111

12-
define spir_kernel void @func_with_phinode_1(i1 %n, i32 addrspace(1)* align 4 %r, <4 x i32> addrspace(1)* %otp, i64 %idx1, i64 %idx2, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i8 addrspace(1)* %s2, i8 addrspace(1)* %s3, i32 %s4, i32 %s5, i32 %bufferOffset) #0 {
12+
define spir_kernel void @func_with_phinode_1(i1 %n, i32 addrspace(1)* %r, <4 x i32> addrspace(1)* %otp, i64 %idx1, i64 %idx2, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i8 addrspace(1)* %s2, i8 addrspace(1)* %s3, i32 %s4, i32 %s5, i32 %bufferOffset) #0 {
1313
bb1:
1414
%add.ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %r, i64 16
1515
%add.ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %r, i64 128

IGC/Compiler/tests/StatelessToStateful/Bindful/simd_block_read.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
; CHECK: target datalayout = {{.*}}-p131072:32:32:32-p131073:32:32:32"
1717
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
1818

19-
define spir_kernel void @test(i32 addrspace(1)* align 4 %in, i32 addrspace(1)* align 4 %out) {
19+
define spir_kernel void @test(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
2020
entry:
2121
%ptrIn = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 1
2222
; CHECK: %[[V:[0-9]+]] = call i32 @llvm.genx.GenISA.simdBlockRead.i32.p131072i32(i32 addrspace(131072)* %{{.*}})

IGC/Compiler/tests/StatelessToStateful/Bindful/simd_block_write.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
; CHECK: target datalayout = {{.*}}-p131072:32:32:32-p131073:32:32:32"
1717
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
1818

19-
define spir_kernel void @test(i32 addrspace(1)* align 4 %in, i32 addrspace(1)* align 4 %out) {
19+
define spir_kernel void @test(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
2020
entry:
2121
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
2222
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 1

IGC/Compiler/tests/StatelessToStateful/Bindful/unused_buffer_argument.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
; CHECK: target datalayout = {{.*}}-p131072:32:32:32-p131073:32:32:32-p131074:32:32:32"
2121
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
2222

23-
define spir_kernel void @test(i32 addrspace(1)* align 4 %srcA, i32 addrspace(1)* align 4 %srcB, i32 addrspace(1)* align 4 %srcC, i32 addrspace(1)* align 4 %dst) {
23+
define spir_kernel void @test(i32 addrspace(1)* %srcA, i32 addrspace(1)* %srcB, i32 addrspace(1)* %srcC, i32 addrspace(1)* %dst) {
2424
entry:
2525
%ptrA = getelementptr inbounds i32, i32 addrspace(1)* %srcA, i64 1
2626
; CHECK: %[[VAL_A:[0-9]+]] = load i32, i32 addrspace(131072)* %{{.*}}, align 4

0 commit comments

Comments
 (0)