Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions llvm/lib/Target/VE/CustomDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ SDValue CustomDAG::createUniformConstMask(Packing Packing, unsigned NumElements,
if (IsTrue)
return Res;

return DAG.getNOT(DL, Res, Res.getValueType());
return createNot(Res, Res.getValueType());
}

SDValue CustomDAG::getConstant(uint64_t Val, EVT VT, bool IsTarget,
Expand Down Expand Up @@ -1360,7 +1360,7 @@ SDValue CustomDAG::getLegalReductionOpVVP(unsigned VVPOpcode, EVT ResVT,
case VEISD::VVP_REDUCE_UMAX:
case VEISD::VVP_REDUCE_SMIN:
case VEISD::VVP_REDUCE_OR: {
// Mask legalization using vm_popcount
// Mask-out off lanes.
if (!isAllTrueMask(Mask))
VectorV = getNode(ISD::AND, Mask.getValueType(), {VectorV, Mask});

Expand All @@ -1377,12 +1377,13 @@ SDValue CustomDAG::getLegalReductionOpVVP(unsigned VVPOpcode, EVT ResVT,
case VEISD::VVP_REDUCE_SMAX:
case VEISD::VVP_REDUCE_MUL:
case VEISD::VVP_REDUCE_AND: {
// TODO: Invert and OR the mask, then compare PCVM against AVL.
// Invert and OR the mask
if (!isAllTrueMask(Mask)) {
auto InverseMask = createNot(Mask, Mask.getValueType());
VectorV = getNode(ISD::OR, Mask.getValueType(), {InverseMask, VectorV});
}

// Mask legalization using vm_popcount
if (!isAllTrueMask(Mask))
VectorV = getNode(ISD::AND, Mask.getValueType(), {VectorV, Mask});

auto Pop = createMaskPopcount(VectorV, AVL);
auto LegalPop = DAG.getZExtOrTrunc(Pop, DL, MVT::i32);

Expand Down
40 changes: 36 additions & 4 deletions llvm/test/CodeGen/VE/Packed/vp-reduce-v512i1-mask-avl-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,46 @@
; RUN: llc -O0 --march=ve -mattr=+packed,+vpu %s -o=/dev/stdout | FileCheck %s

define fastcc i1 @test_reduce_and(i1 %s, <512 x i1> %v, <512 x i1> %m, i32 %n) {
; FIXME: The generated code is incorrect!
; CHECK-LABEL: test_reduce_and:
; CHECK: # %bb.0:
; CHECK-NEXT: adds.l %s11, -32, %s11
; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: ld %s61, 24(, %s14)
; CHECK-NEXT: or %s62, 0, %s0
; CHECK-NEXT: lea %s63, 315
; CHECK-NEXT: shm.l %s63, (%s61)
; CHECK-NEXT: shm.l %s8, 8(%s61)
; CHECK-NEXT: shm.l %s11, 16(%s61)
; CHECK-NEXT: monc
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: svm %s16, %vm4, 0
; CHECK-NEXT: st %s16, (, %s11)
; CHECK-NEXT: svm %s16, %vm4, 1
; CHECK-NEXT: st %s16, 8(, %s11)
; CHECK-NEXT: svm %s16, %vm4, 2
; CHECK-NEXT: st %s16, 16(, %s11)
; CHECK-NEXT: svm %s16, %vm4, 3
; CHECK-NEXT: st %s16, 24(, %s11) # 32-byte Folded Spill
; CHECK-NEXT: andm %vm4, %vm0, %vm2
; CHECK-NEXT: ld %s16, (, %s11)
; CHECK-NEXT: lvm %vm2, 0, %s16
; CHECK-NEXT: ld %s16, 8(, %s11)
; CHECK-NEXT: lvm %vm2, 1, %s16
; CHECK-NEXT: ld %s16, 16(, %s11)
; CHECK-NEXT: lvm %vm2, 2, %s16
; CHECK-NEXT: ld %s16, 24(, %s11) # 32-byte Folded Reload
; CHECK-NEXT: lvm %vm2, 3, %s16
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: or %s2, 0, %s1
; CHECK-NEXT: andm %vm1, %vm2, %vm4
; CHECK-NEXT: andm %vm1, %vm0, %vm0
; CHECK-NEXT: xorm %vm2, %vm2, %vm1
; CHECK-NEXT: orm %vm2, %vm2, %vm4
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: pcvm %s3, %vm1
; CHECK-NEXT: andm %vm1, %vm3, %vm5
; CHECK-NEXT: pcvm %s3, %vm2
; CHECK-NEXT: xorm %vm1, %vm5, %vm1
; CHECK-NEXT: orm %vm1, %vm1, %vm3
; CHECK-NEXT: pcvm %s1, %vm1
; CHECK-NEXT: adds.l %s1, %s1, %s3
; CHECK-NEXT: # kill: def $sw1 killed $sw1 killed $sx1
Expand All @@ -23,6 +54,7 @@ define fastcc i1 @test_reduce_and(i1 %s, <512 x i1> %v, <512 x i1> %m, i32 %n) {
; CHECK-NEXT: # implicit-def: $sx1
; CHECK-NEXT: or %s1, 0, %s2
; CHECK-NEXT: and %s0, %s0, %s1
; CHECK-NEXT: adds.l %s11, 32, %s11
; CHECK-NEXT: b.l.t (, %s10)
%r = call i1 @llvm.vp.reduce.and.v512i1(i1 %s, <512 x i1> %v, <512 x i1> %m, i32 %n)
ret i1 %r
Expand Down
35 changes: 33 additions & 2 deletions llvm/test/CodeGen/VE/Vector/vp-reduce-v256i1-mask-avl-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,42 @@
; RUN: llc -O0 --march=ve -mattr=-packed,+vpu %s -o=/dev/stdout | FileCheck %s

define fastcc i1 @test_reduce_and(i1 %s, <256 x i1> %v, <256 x i1> %m, i32 %n) {
; FIXME: The generated code is incorrect!
; CHECK-LABEL: test_reduce_and:
; CHECK: # %bb.0:
; CHECK-NEXT: adds.l %s11, -32, %s11
; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: ld %s61, 24(, %s14)
; CHECK-NEXT: or %s62, 0, %s0
; CHECK-NEXT: lea %s63, 315
; CHECK-NEXT: shm.l %s63, (%s61)
; CHECK-NEXT: shm.l %s8, 8(%s61)
; CHECK-NEXT: shm.l %s11, 16(%s61)
; CHECK-NEXT: monc
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: svm %s16, %vm2, 0
; CHECK-NEXT: st %s16, (, %s11)
; CHECK-NEXT: svm %s16, %vm2, 1
; CHECK-NEXT: st %s16, 8(, %s11)
; CHECK-NEXT: svm %s16, %vm2, 2
; CHECK-NEXT: st %s16, 16(, %s11)
; CHECK-NEXT: svm %s16, %vm2, 3
; CHECK-NEXT: st %s16, 24(, %s11) # 32-byte Folded Spill
; CHECK-NEXT: andm %vm2, %vm0, %vm1
; CHECK-NEXT: ld %s16, (, %s11)
; CHECK-NEXT: lvm %vm1, 0, %s16
; CHECK-NEXT: ld %s16, 8(, %s11)
; CHECK-NEXT: lvm %vm1, 1, %s16
; CHECK-NEXT: ld %s16, 16(, %s11)
; CHECK-NEXT: lvm %vm1, 2, %s16
; CHECK-NEXT: ld %s16, 24(, %s11) # 32-byte Folded Reload
; CHECK-NEXT: lvm %vm1, 3, %s16
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: or %s2, 0, %s1
; CHECK-NEXT: andm %vm1, %vm1, %vm2
; CHECK-NEXT: andm %vm3, %vm0, %vm0
; CHECK-NEXT: xorm %vm1, %vm1, %vm3
; CHECK-NEXT: orm %vm1, %vm1, %vm2
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: pcvm %s1, %vm1
; CHECK-NEXT: # kill: def $sw1 killed $sw1 killed $sx1
Expand All @@ -20,6 +50,7 @@ define fastcc i1 @test_reduce_and(i1 %s, <256 x i1> %v, <256 x i1> %m, i32 %n) {
; CHECK-NEXT: # implicit-def: $sx1
; CHECK-NEXT: or %s1, 0, %s2
; CHECK-NEXT: and %s0, %s0, %s1
; CHECK-NEXT: adds.l %s11, 32, %s11
; CHECK-NEXT: b.l.t (, %s10)
%r = call i1 @llvm.vp.reduce.and.v256i1(i1 %s, <256 x i1> %v, <256 x i1> %m, i32 %n)
ret i1 %r
Expand Down