You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[TTI][RISCV] Add cost modelling for intrinsic vp.load.ff (#169890)
This patch is a rework of #160470 (which was reverted).
With getMemIntrinsicCost() now available, we can re‑land the change and
reduce vp_load_ff boilerplate.
Copy file name to clipboardExpand all lines: llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
+65-65Lines changed: 65 additions & 65 deletions
Original file line number
Diff line number
Diff line change
@@ -836,74 +836,74 @@ define void @abs() {
836
836
retvoid
837
837
}
838
838
839
-
definevoid@load() {
839
+
definevoid@load(ptr%src) {
840
840
; CHECK-LABEL: 'load'
841
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef)
842
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2
843
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef)
844
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = load <4 x i8>, ptr undef, align 4
845
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef)
846
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = load <8 x i8>, ptr undef, align 8
847
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef)
848
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <16 x i8>, ptr undef, align 16
849
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef)
850
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = load <2 x i64>, ptr undef, align 16
851
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef)
852
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = load <4 x i64>, ptr undef, align 32
853
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef)
854
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t14 = load <8 x i64>, ptr undef, align 64
855
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef)
856
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t16 = load <16 x i64>, ptr undef, align 128
857
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
858
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = load <vscale x 2 x i8>, ptr undef, align 2
859
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
860
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = load <vscale x 4 x i8>, ptr undef, align 4
861
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
862
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t22 = load <vscale x 8 x i8>, ptr undef, align 8
863
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
864
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = load <vscale x 16 x i8>, ptr undef, align 16
865
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
866
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = load <vscale x 2 x i64>, ptr undef, align 16
867
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
868
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = load <vscale x 4 x i64>, ptr undef, align 32
869
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
870
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = load <vscale x 8 x i64>, ptr undef, align 64
871
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
872
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = load <vscale x 16 x i64>, ptr undef, align 128
841
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %src, <2 x i1> undef, i32 undef)
842
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
843
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef)
844
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
845
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef)
846
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
847
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef)
848
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
849
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef)
850
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
851
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef)
852
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
853
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t12 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef)
854
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
855
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t14 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef)
856
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
857
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
858
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
859
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
860
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
861
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
862
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
863
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
864
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
865
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
866
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
867
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
868
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
869
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
870
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
871
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
872
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
873
873
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
874
874
;
875
-
%t0 = call <2 x i8> @llvm.vp.load.v2i8(ptrundef, <2 x i1> undef, i32undef)
876
-
%t1 = load<2 x i8>, ptrundef
877
-
%t2 = call <4 x i8> @llvm.vp.load.v4i8(ptrundef, <4 x i1> undef, i32undef)
878
-
%t3 = load<4 x i8>, ptrundef
879
-
%t4 = call <8 x i8> @llvm.vp.load.v8i8(ptrundef, <8 x i1> undef, i32undef)
880
-
%t5 = load<8 x i8>, ptrundef
881
-
%t6 = call <16 x i8> @llvm.vp.load.v16i8(ptrundef, <16 x i1> undef, i32undef)
882
-
%t7 = load<16 x i8>, ptrundef
883
-
%t8 = call <2 x i64> @llvm.vp.load.v2i64(ptrundef, <2 x i1> undef, i32undef)
884
-
%t9 = load<2 x i64>, ptrundef
885
-
%t10 = call <4 x i64> @llvm.vp.load.v4i64(ptrundef, <4 x i1> undef, i32undef)
886
-
%t12 = load<4 x i64>, ptrundef
887
-
%t13 = call <8 x i64> @llvm.vp.load.v8i64(ptrundef, <8 x i1> undef, i32undef)
888
-
%t14 = load<8 x i64>, ptrundef
889
-
%t15 = call <16 x i64> @llvm.vp.load.v16i64(ptrundef, <16 x i1> undef, i32undef)
890
-
%t16 = load<16 x i64>, ptrundef
891
-
%t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptrundef, <vscale x 2 x i1> undef, i32undef)
892
-
%t18 = load<vscale x 2 x i8>, ptrundef
893
-
%t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptrundef, <vscale x 4 x i1> undef, i32undef)
894
-
%t20 = load<vscale x 4 x i8>, ptrundef
895
-
%t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptrundef, <vscale x 8 x i1> undef, i32undef)
896
-
%t22 = load<vscale x 8 x i8>, ptrundef
897
-
%t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptrundef, <vscale x 16 x i1> undef, i32undef)
898
-
%t24 = load<vscale x 16 x i8>, ptrundef
899
-
%t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptrundef, <vscale x 2 x i1> undef, i32undef)
900
-
%t26 = load<vscale x 2 x i64>, ptrundef
901
-
%t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptrundef, <vscale x 4 x i1> undef, i32undef)
902
-
%t28 = load<vscale x 4 x i64>, ptrundef
903
-
%t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptrundef, <vscale x 8 x i1> undef, i32undef)
904
-
%t30 = load<vscale x 8 x i64>, ptrundef
905
-
%t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptrundef, <vscale x 16 x i1> undef, i32undef)
906
-
%t32 = load<vscale x 16 x i64>, ptrundef
875
+
%t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr%src, <2 x i1> undef, i32undef)
876
+
%t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptralign1%src, <2 x i1> undef, i32undef)
877
+
%t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr%src, <4 x i1> undef, i32undef)
878
+
%t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptralign1%src, <4 x i1> undef, i32undef)
879
+
%t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr%src, <8 x i1> undef, i32undef)
880
+
%t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptralign1%src, <8 x i1> undef, i32undef)
881
+
%t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr%src, <16 x i1> undef, i32undef)
882
+
%t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptralign1%src, <16 x i1> undef, i32undef)
883
+
%t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr%src, <2 x i1> undef, i32undef)
884
+
%t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptralign8%src, <2 x i1> undef, i32undef)
885
+
%t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr%src, <4 x i1> undef, i32undef)
886
+
%t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptralign8%src, <4 x i1> undef, i32undef)
887
+
%t12 = call <8 x i64> @llvm.vp.load.v8i64(ptr%src, <8 x i1> undef, i32undef)
888
+
%t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptralign8%src, <8 x i1> undef, i32undef)
889
+
%t14 = call <16 x i64> @llvm.vp.load.v16i64(ptr%src, <16 x i1> undef, i32undef)
890
+
%t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptralign8%src, <16 x i1> undef, i32undef)
891
+
%t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr%src, <vscale x 2 x i1> undef, i32undef)
892
+
%t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptralign1%src, <vscale x 2 x i1> undef, i32undef)
893
+
%t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr%src, <vscale x 4 x i1> undef, i32undef)
894
+
%t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptralign1%src, <vscale x 4 x i1> undef, i32undef)
895
+
%t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr%src, <vscale x 8 x i1> undef, i32undef)
896
+
%t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptralign1%src, <vscale x 8 x i1> undef, i32undef)
897
+
%t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr%src, <vscale x 16 x i1> undef, i32undef)
898
+
%t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptralign1%src, <vscale x 16 x i1> undef, i32undef)
899
+
%t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr%src, <vscale x 2 x i1> undef, i32undef)
900
+
%t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptralign8%src, <vscale x 2 x i1> undef, i32undef)
901
+
%t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr%src, <vscale x 4 x i1> undef, i32undef)
902
+
%t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptralign8%src, <vscale x 4 x i1> undef, i32undef)
903
+
%t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr%src, <vscale x 8 x i1> undef, i32undef)
904
+
%t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptralign8%src, <vscale x 8 x i1> undef, i32undef)
905
+
%t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr%src, <vscale x 16 x i1> undef, i32undef)
906
+
%t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptralign8%src, <vscale x 16 x i1> undef, i32undef)
0 commit comments