Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions test/TritonIntelGPU/blockptr_load.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ module attributes {"ttg.num-warps" = 1 : i32, "ttg.threads-per-warp" = 16 : i32,
// CHECK: %[[OFFSET_0:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[OFFSET_1:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[WIDTH_i64:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[HEIGHT_i64:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[HEIGHT_i64:.*]] = llvm.extractvalue %[[VAL_11]][3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue %[[VAL_12]][4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[BASE:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
Expand Down Expand Up @@ -199,7 +199,7 @@ module attributes {"ttg.num-warps" = 1 : i32, "ttg.threads-per-warp" = 16 : i32,
// CHECK: %[[OFFSET_0:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[OFFSET_1:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[WIDTH_i64:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[HEIGHT_i64:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[HEIGHT_i64:.*]] = llvm.extractvalue %[[VAL_10]][3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue %[[VAL_11]][4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
// CHECK: %[[BASE:.*]] = llvm.extractvalue %[[BLOCK_POINTER]][6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
Expand Down
56 changes: 44 additions & 12 deletions third_party/intel/lib/TritonIntelGPUToLLVM/LoadStoreOpToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,42 @@ static int __builtin_ctz(unsigned x) {

namespace {

static Value skipCasts(Value v) {
Operation *def = v.getDefiningOp();
if (def &&
isa<LLVM::TruncOp, LLVM::SExtOp, LLVM::ZExtOp, LLVM::BitcastOp>(def))
return def->getOperand(0);
return v;
}

static Value tryFoldOp(Value v) {
if (Operation *def = v.getDefiningOp()) {
SmallVector<OpFoldResult> results;
if (succeeded(def->fold(results)) && results.size() == 1) {
if (auto val = dyn_cast_or_null<Value>(results[0]))
return val;
}
Comment on lines +53 to +56
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fold result could be an Attribute containing a constant value, not just a Value. If results[0] is an Attribute, it should be converted to a constant Value rather than being ignored. This would cause tryConstEval to miss foldable constant attributes.

Copilot uses AI. Check for mistakes.
}
return v;
}

static std::optional<int64_t> tryConstEval(Value v, int depth = 16) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we port these changes to Utility.cpp and improve mlir::triton::intel::getFoldedConstantValue??

Copy link
Contributor Author

@januszjah januszjah Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it will increase work done while compilation because getFoldedConstantValue is used in isConstant in many places and my version does multiple skips and folds instead of single fold, however IDK if it's a problem and how obsessive we are about cycles at this step.

for (int i = 0; i < depth; ++i) {
if (auto res = getConstantIntValue(v))
return res;

Value newV = skipCasts(v);
newV = tryFoldOp(newV);

if (newV == v)
break;

v = newV;
}

return std::nullopt;
}

Value maybeAnd(RewriterBase &rewriter, Location loc, Value a, Value b) {
auto tb = TritonLLVMOpBuilder(loc, rewriter);
if (a && b) {
Expand Down Expand Up @@ -1590,23 +1626,19 @@ struct LoadOpToBlockIOConversion
std::swap(baseWidth, baseHeight);
}
// HW requires the pitch to be at least 64 bytes.
std::function<Value(Value)> skipTrunc = [&](Value v) {
if (dyn_cast_or_null<LLVM::TruncOp>(v.getDefiningOp()))
return skipTrunc(v.getDefiningOp()->getOperand(0));
return v;
};
if (Operation *op = skipTrunc(pitch).getDefiningOp()) {
std::optional<int64_t> pitchConst =
mlir::triton::intel::getFoldedConstantValue(op);
if (pitchConst.has_value()) {
if ((*pitchConst * elemSizeInBits / 8) < 64)
return failure();
}
if (auto pitchConst = tryConstEval(pitch)) {
if ((*pitchConst * elemSizeInBits / 8) < 64)
return failure();
}

baseWidth = b.trunc(i32_ty, baseWidth);
baseHeight = b.trunc(i32_ty, baseHeight);

if (auto widthConst = tryConstEval(baseWidth)) {
if ((*widthConst * elemSizeInBits / 8) < 64)
return failure();
}

const unsigned originalElemBits = elemSizeInBits;
if (isTransposeRequired) {
// adjust the block io parameter to align HW's limitations on
Expand Down
Loading