@@ -10,12 +10,9 @@ define <vscale x 16 x i8> @sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
1010; CHECK-LABEL: sabd_b:
1111; CHECK: # %bb.0:
1212; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
13- ; CHECK-NEXT: vwsub.vv v12, v8, v10
14- ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
15- ; CHECK-NEXT: vrsub.vi v8, v12, 0
16- ; CHECK-NEXT: vmax.vv v12, v12, v8
17- ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
18- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
13+ ; CHECK-NEXT: vmin.vv v12, v8, v10
14+ ; CHECK-NEXT: vmax.vv v8, v8, v10
15+ ; CHECK-NEXT: vsub.vv v8, v8, v12
1916; CHECK-NEXT: ret
2017 %a.sext = sext <vscale x 16 x i8 > %a to <vscale x 16 x i16 >
2118 %b.sext = sext <vscale x 16 x i8 > %b to <vscale x 16 x i16 >
@@ -33,9 +30,9 @@ define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
3330; CHECK-NEXT: vmerge.vim v12, v10, -1, v0
3431; CHECK-NEXT: vmv1r.v v0, v8
3532; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
36- ; CHECK-NEXT: vsub .vv v8 , v12, v8
37- ; CHECK-NEXT: vrsub.vi v10, v8, 0
38- ; CHECK-NEXT: vmax .vv v8, v8, v10
33+ ; CHECK-NEXT: vmin .vv v10 , v12, v8
34+ ; CHECK-NEXT: vmax.vv v8, v12, v8
35+ ; CHECK-NEXT: vsub .vv v8, v8, v10
3936; CHECK-NEXT: ret
4037 %a.sext = sext <vscale x 16 x i1 > %a to <vscale x 16 x i8 >
4138 %b.sext = sext <vscale x 16 x i1 > %b to <vscale x 16 x i8 >
@@ -48,12 +45,9 @@ define <vscale x 8 x i16> @sabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
4845; CHECK-LABEL: sabd_h:
4946; CHECK: # %bb.0:
5047; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
51- ; CHECK-NEXT: vwsub.vv v12, v8, v10
52- ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
53- ; CHECK-NEXT: vrsub.vi v8, v12, 0
54- ; CHECK-NEXT: vmax.vv v12, v12, v8
55- ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
56- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
48+ ; CHECK-NEXT: vmin.vv v12, v8, v10
49+ ; CHECK-NEXT: vmax.vv v8, v8, v10
50+ ; CHECK-NEXT: vsub.vv v8, v8, v12
5751; CHECK-NEXT: ret
5852 %a.sext = sext <vscale x 8 x i16 > %a to <vscale x 8 x i32 >
5953 %b.sext = sext <vscale x 8 x i16 > %b to <vscale x 8 x i32 >
@@ -67,10 +61,11 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
6761; CHECK-LABEL: sabd_h_promoted_ops:
6862; CHECK: # %bb.0:
6963; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
70- ; CHECK-NEXT: vwsub.vv v10, v8, v9
64+ ; CHECK-NEXT: vmin.vv v10, v8, v9
65+ ; CHECK-NEXT: vmax.vv v8, v8, v9
66+ ; CHECK-NEXT: vsub.vv v10, v8, v10
7167; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
72- ; CHECK-NEXT: vrsub.vi v8, v10, 0
73- ; CHECK-NEXT: vmax.vv v8, v10, v8
68+ ; CHECK-NEXT: vzext.vf2 v8, v10
7469; CHECK-NEXT: ret
7570 %a.sext = sext <vscale x 8 x i8 > %a to <vscale x 8 x i16 >
7671 %b.sext = sext <vscale x 8 x i8 > %b to <vscale x 8 x i16 >
@@ -83,12 +78,9 @@ define <vscale x 4 x i32> @sabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
8378; CHECK-LABEL: sabd_s:
8479; CHECK: # %bb.0:
8580; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
86- ; CHECK-NEXT: vwsub.vv v12, v8, v10
87- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
88- ; CHECK-NEXT: vrsub.vi v8, v12, 0
89- ; CHECK-NEXT: vmax.vv v12, v12, v8
90- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
91- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
81+ ; CHECK-NEXT: vmin.vv v12, v8, v10
82+ ; CHECK-NEXT: vmax.vv v8, v8, v10
83+ ; CHECK-NEXT: vsub.vv v8, v8, v12
9284; CHECK-NEXT: ret
9385 %a.sext = sext <vscale x 4 x i32 > %a to <vscale x 4 x i64 >
9486 %b.sext = sext <vscale x 4 x i32 > %b to <vscale x 4 x i64 >
@@ -102,10 +94,11 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
10294; CHECK-LABEL: sabd_s_promoted_ops:
10395; CHECK: # %bb.0:
10496; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
105- ; CHECK-NEXT: vwsub.vv v10, v8, v9
97+ ; CHECK-NEXT: vmin.vv v10, v8, v9
98+ ; CHECK-NEXT: vmax.vv v8, v8, v9
99+ ; CHECK-NEXT: vsub.vv v10, v8, v10
106100; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
107- ; CHECK-NEXT: vrsub.vi v8, v10, 0
108- ; CHECK-NEXT: vmax.vv v8, v10, v8
101+ ; CHECK-NEXT: vzext.vf2 v8, v10
109102; CHECK-NEXT: ret
110103 %a.sext = sext <vscale x 4 x i16 > %a to <vscale x 4 x i32 >
111104 %b.sext = sext <vscale x 4 x i16 > %b to <vscale x 4 x i32 >
@@ -128,10 +121,11 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
128121; CHECK-LABEL: sabd_d_promoted_ops:
129122; CHECK: # %bb.0:
130123; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
131- ; CHECK-NEXT: vwsub.vv v10, v8, v9
124+ ; CHECK-NEXT: vmin.vv v10, v8, v9
125+ ; CHECK-NEXT: vmax.vv v8, v8, v9
126+ ; CHECK-NEXT: vsub.vv v10, v8, v10
132127; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
133- ; CHECK-NEXT: vrsub.vi v8, v10, 0
134- ; CHECK-NEXT: vmax.vv v8, v10, v8
128+ ; CHECK-NEXT: vzext.vf2 v8, v10
135129; CHECK-NEXT: ret
136130 %a.sext = sext <vscale x 2 x i32 > %a to <vscale x 2 x i64 >
137131 %b.sext = sext <vscale x 2 x i32 > %b to <vscale x 2 x i64 >
@@ -148,12 +142,9 @@ define <vscale x 16 x i8> @uabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
148142; CHECK-LABEL: uabd_b:
149143; CHECK: # %bb.0:
150144; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
151- ; CHECK-NEXT: vwsubu.vv v12, v8, v10
152- ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
153- ; CHECK-NEXT: vrsub.vi v8, v12, 0
154- ; CHECK-NEXT: vmax.vv v12, v12, v8
155- ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
156- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
145+ ; CHECK-NEXT: vminu.vv v12, v8, v10
146+ ; CHECK-NEXT: vmaxu.vv v8, v8, v10
147+ ; CHECK-NEXT: vsub.vv v8, v8, v12
157148; CHECK-NEXT: ret
158149 %a.zext = zext <vscale x 16 x i8 > %a to <vscale x 16 x i16 >
159150 %b.zext = zext <vscale x 16 x i8 > %b to <vscale x 16 x i16 >
@@ -171,9 +162,9 @@ define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
171162; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
172163; CHECK-NEXT: vmv1r.v v0, v8
173164; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
174- ; CHECK-NEXT: vsub .vv v8 , v12, v8
175- ; CHECK-NEXT: vrsub.vi v10, v8, 0
176- ; CHECK-NEXT: vmax .vv v8, v8, v10
165+ ; CHECK-NEXT: vminu .vv v10 , v12, v8
166+ ; CHECK-NEXT: vmaxu.vv v8, v12, v8
167+ ; CHECK-NEXT: vsub .vv v8, v8, v10
177168; CHECK-NEXT: ret
178169 %a.zext = zext <vscale x 16 x i1 > %a to <vscale x 16 x i8 >
179170 %b.zext = zext <vscale x 16 x i1 > %b to <vscale x 16 x i8 >
@@ -186,12 +177,9 @@ define <vscale x 8 x i16> @uabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
186177; CHECK-LABEL: uabd_h:
187178; CHECK: # %bb.0:
188179; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
189- ; CHECK-NEXT: vwsubu.vv v12, v8, v10
190- ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
191- ; CHECK-NEXT: vrsub.vi v8, v12, 0
192- ; CHECK-NEXT: vmax.vv v12, v12, v8
193- ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
194- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
180+ ; CHECK-NEXT: vminu.vv v12, v8, v10
181+ ; CHECK-NEXT: vmaxu.vv v8, v8, v10
182+ ; CHECK-NEXT: vsub.vv v8, v8, v12
195183; CHECK-NEXT: ret
196184 %a.zext = zext <vscale x 8 x i16 > %a to <vscale x 8 x i32 >
197185 %b.zext = zext <vscale x 8 x i16 > %b to <vscale x 8 x i32 >
@@ -205,10 +193,11 @@ define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
205193; CHECK-LABEL: uabd_h_promoted_ops:
206194; CHECK: # %bb.0:
207195; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
208- ; CHECK-NEXT: vwsubu.vv v10, v8, v9
196+ ; CHECK-NEXT: vminu.vv v10, v8, v9
197+ ; CHECK-NEXT: vmaxu.vv v8, v8, v9
198+ ; CHECK-NEXT: vsub.vv v10, v8, v10
209199; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
210- ; CHECK-NEXT: vrsub.vi v8, v10, 0
211- ; CHECK-NEXT: vmax.vv v8, v10, v8
200+ ; CHECK-NEXT: vzext.vf2 v8, v10
212201; CHECK-NEXT: ret
213202 %a.zext = zext <vscale x 8 x i8 > %a to <vscale x 8 x i16 >
214203 %b.zext = zext <vscale x 8 x i8 > %b to <vscale x 8 x i16 >
@@ -221,12 +210,9 @@ define <vscale x 4 x i32> @uabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
221210; CHECK-LABEL: uabd_s:
222211; CHECK: # %bb.0:
223212; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
224- ; CHECK-NEXT: vwsubu.vv v12, v8, v10
225- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
226- ; CHECK-NEXT: vrsub.vi v8, v12, 0
227- ; CHECK-NEXT: vmax.vv v12, v12, v8
228- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
229- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
213+ ; CHECK-NEXT: vminu.vv v12, v8, v10
214+ ; CHECK-NEXT: vmaxu.vv v8, v8, v10
215+ ; CHECK-NEXT: vsub.vv v8, v8, v12
230216; CHECK-NEXT: ret
231217 %a.zext = zext <vscale x 4 x i32 > %a to <vscale x 4 x i64 >
232218 %b.zext = zext <vscale x 4 x i32 > %b to <vscale x 4 x i64 >
@@ -240,10 +226,11 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
240226; CHECK-LABEL: uabd_s_promoted_ops:
241227; CHECK: # %bb.0:
242228; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
243- ; CHECK-NEXT: vwsubu.vv v10, v8, v9
229+ ; CHECK-NEXT: vminu.vv v10, v8, v9
230+ ; CHECK-NEXT: vmaxu.vv v8, v8, v9
231+ ; CHECK-NEXT: vsub.vv v10, v8, v10
244232; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
245- ; CHECK-NEXT: vrsub.vi v8, v10, 0
246- ; CHECK-NEXT: vmax.vv v8, v10, v8
233+ ; CHECK-NEXT: vzext.vf2 v8, v10
247234; CHECK-NEXT: ret
248235 %a.zext = zext <vscale x 4 x i16 > %a to <vscale x 4 x i32 >
249236 %b.zext = zext <vscale x 4 x i16 > %b to <vscale x 4 x i32 >
@@ -266,10 +253,11 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
266253; CHECK-LABEL: uabd_d_promoted_ops:
267254; CHECK: # %bb.0:
268255; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
269- ; CHECK-NEXT: vwsubu.vv v10, v8, v9
256+ ; CHECK-NEXT: vminu.vv v10, v8, v9
257+ ; CHECK-NEXT: vmaxu.vv v8, v8, v9
258+ ; CHECK-NEXT: vsub.vv v10, v8, v10
270259; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
271- ; CHECK-NEXT: vrsub.vi v8, v10, 0
272- ; CHECK-NEXT: vmax.vv v8, v10, v8
260+ ; CHECK-NEXT: vzext.vf2 v8, v10
273261; CHECK-NEXT: ret
274262 %a.zext = zext <vscale x 2 x i32 > %a to <vscale x 2 x i64 >
275263 %b.zext = zext <vscale x 2 x i32 > %b to <vscale x 2 x i64 >
@@ -285,12 +273,9 @@ define <vscale x 4 x i32> @uabd_non_matching_extension(<vscale x 4 x i32> %a, <v
285273; CHECK: # %bb.0:
286274; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
287275; CHECK-NEXT: vzext.vf4 v12, v10
288- ; CHECK-NEXT: vwsubu.vv v16, v8, v12
289- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
290- ; CHECK-NEXT: vrsub.vi v8, v16, 0
291- ; CHECK-NEXT: vmax.vv v12, v16, v8
292- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
293- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
276+ ; CHECK-NEXT: vminu.vv v10, v8, v12
277+ ; CHECK-NEXT: vmaxu.vv v8, v8, v12
278+ ; CHECK-NEXT: vsub.vv v8, v8, v10
294279; CHECK-NEXT: ret
295280 %a.zext = zext <vscale x 4 x i32 > %a to <vscale x 4 x i64 >
296281 %b.zext = zext <vscale x 4 x i8 > %b to <vscale x 4 x i64 >
@@ -307,10 +292,11 @@ define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a,
307292; CHECK: # %bb.0:
308293; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
309294; CHECK-NEXT: vzext.vf2 v10, v8
310- ; CHECK-NEXT: vwsubu.vv v12, v10, v9
295+ ; CHECK-NEXT: vminu.vv v8, v10, v9
296+ ; CHECK-NEXT: vmaxu.vv v9, v10, v9
297+ ; CHECK-NEXT: vsub.vv v10, v9, v8
311298; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
312- ; CHECK-NEXT: vrsub.vi v8, v12, 0
313- ; CHECK-NEXT: vmax.vv v8, v12, v8
299+ ; CHECK-NEXT: vzext.vf2 v8, v10
314300; CHECK-NEXT: ret
315301 %a.zext = zext <vscale x 4 x i8 > %a to <vscale x 4 x i32 >
316302 %b.zext = zext <vscale x 4 x i16 > %b to <vscale x 4 x i32 >
0 commit comments