11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -o - %s | FileCheck %s
33
4- define arm_aapcs_vfpcc <8 x half > @test_vaddq_f16 (<8 x half > %a , <8 x half > %b ) {
4+ define arm_aapcs_vfpcc <8 x half > @test_vaddq_f16 (<8 x half > %a , <8 x half > %b ) # 0 {
55; CHECK-LABEL: test_vaddq_f16:
66; CHECK: @ %bb.0: @ %entry
77; CHECK-NEXT: vadd.f16 q0, q0, q1
1111 ret <8 x half > %0
1212}
1313
14- define arm_aapcs_vfpcc <4 x float > @test_vaddq_f32 (<4 x float > %a , <4 x float > %b ) {
14+ define arm_aapcs_vfpcc <4 x float > @test_vaddq_f32 (<4 x float > %a , <4 x float > %b ) # 0 {
1515; CHECK-LABEL: test_vaddq_f32:
1616; CHECK: @ %bb.0: @ %entry
1717; CHECK-NEXT: vadd.f32 q0, q0, q1
2121 ret <4 x float > %0
2222}
2323
24- define arm_aapcs_vfpcc <8 x half > @test_vsubq_f16 (<8 x half > %a , <8 x half > %b ) {
24+ define arm_aapcs_vfpcc <8 x half > @test_vsubq_f16 (<8 x half > %a , <8 x half > %b ) # 0 {
2525; CHECK-LABEL: test_vsubq_f16:
2626; CHECK: @ %bb.0: @ %entry
2727; CHECK-NEXT: vsub.f16 q0, q0, q1
3131 ret <8 x half > %0
3232}
3333
34- define arm_aapcs_vfpcc <4 x float > @test_vsubq_f32 (<4 x float > %a , <4 x float > %b ) {
34+ define arm_aapcs_vfpcc <4 x float > @test_vsubq_f32 (<4 x float > %a , <4 x float > %b ) # 0 {
3535; CHECK-LABEL: test_vsubq_f32:
3636; CHECK: @ %bb.0: @ %entry
3737; CHECK-NEXT: vsub.f32 q0, q0, q1
4141 ret <4 x float > %0
4242}
4343
44- define arm_aapcs_vfpcc <8 x half > @test_vmulq_f16 (<8 x half > %a , <8 x half > %b ) {
44+ define arm_aapcs_vfpcc <8 x half > @test_vmulq_f16 (<8 x half > %a , <8 x half > %b ) # 0 {
4545; CHECK-LABEL: test_vmulq_f16:
4646; CHECK: @ %bb.0: @ %entry
4747; CHECK-NEXT: vmul.f16 q0, q0, q1
5151 ret <8 x half > %0
5252}
5353
54- define arm_aapcs_vfpcc <4 x float > @test_vmulq_f32 (<4 x float > %a , <4 x float > %b ) {
54+ define arm_aapcs_vfpcc <4 x float > @test_vmulq_f32 (<4 x float > %a , <4 x float > %b ) # 0 {
5555; CHECK-LABEL: test_vmulq_f32:
5656; CHECK: @ %bb.0: @ %entry
5757; CHECK-NEXT: vmul.f32 q0, q0, q1
6464
6565
6666
67- define arm_aapcs_vfpcc <8 x half > @test_vaddq_f16_splat (<8 x half > %a , half %b ) {
67+ define arm_aapcs_vfpcc <8 x half > @test_vaddq_f16_splat (<8 x half > %a , half %b ) # 0 {
6868; CHECK-LABEL: test_vaddq_f16_splat:
6969; CHECK: @ %bb.0: @ %entry
7070; CHECK-NEXT: vmov.f16 r0, s4
7777 ret <8 x half > %0
7878}
7979
80- define arm_aapcs_vfpcc <4 x float > @test_vaddq_f32_splat (<4 x float > %a , float %b ) {
80+ define arm_aapcs_vfpcc <4 x float > @test_vaddq_f32_splat (<4 x float > %a , float %b ) # 0 {
8181; CHECK-LABEL: test_vaddq_f32_splat:
8282; CHECK: @ %bb.0: @ %entry
8383; CHECK-NEXT: vmov r0, s4
9090 ret <4 x float > %0
9191}
9292
93- define arm_aapcs_vfpcc <8 x half > @test_vsubq_f16_splat (<8 x half > %a , half %b ) {
93+ define arm_aapcs_vfpcc <8 x half > @test_vsubq_f16_splat (<8 x half > %a , half %b ) # 0 {
9494; CHECK-LABEL: test_vsubq_f16_splat:
9595; CHECK: @ %bb.0: @ %entry
9696; CHECK-NEXT: vmov.f16 r0, s4
@@ -103,7 +103,7 @@ entry:
103103 ret <8 x half > %0
104104}
105105
106- define arm_aapcs_vfpcc <4 x float > @test_vsubq_f32_splat (<4 x float > %a , float %b ) {
106+ define arm_aapcs_vfpcc <4 x float > @test_vsubq_f32_splat (<4 x float > %a , float %b ) # 0 {
107107; CHECK-LABEL: test_vsubq_f32_splat:
108108; CHECK: @ %bb.0: @ %entry
109109; CHECK-NEXT: vmov r0, s4
@@ -116,7 +116,7 @@ entry:
116116 ret <4 x float > %0
117117}
118118
119- define arm_aapcs_vfpcc <8 x half > @test_vmulq_f16_splat (<8 x half > %a , half %b ) {
119+ define arm_aapcs_vfpcc <8 x half > @test_vmulq_f16_splat (<8 x half > %a , half %b ) # 0 {
120120; CHECK-LABEL: test_vmulq_f16_splat:
121121; CHECK: @ %bb.0: @ %entry
122122; CHECK-NEXT: vmov.f16 r0, s4
@@ -129,7 +129,7 @@ entry:
129129 ret <8 x half > %0
130130}
131131
132- define arm_aapcs_vfpcc <4 x float > @test_vmulq_f32_splat (<4 x float > %a , float %b ) {
132+ define arm_aapcs_vfpcc <4 x float > @test_vmulq_f32_splat (<4 x float > %a , float %b ) # 0 {
133133; CHECK-LABEL: test_vmulq_f32_splat:
134134; CHECK: @ %bb.0: @ %entry
135135; CHECK-NEXT: vmov r0, s4
@@ -141,3 +141,103 @@ entry:
141141 %0 = tail call <4 x float > @llvm.arm.mve.vmul.v4f32 (<4 x float > %a , <4 x float > %s )
142142 ret <4 x float > %0
143143}
144+
145+ define arm_aapcs_vfpcc <4 x float > @fma_v4f32 (<4 x float > %dst , <4 x float > %s1 , <4 x float > %s2 ) #0 {
146+ ; CHECK-LABEL: fma_v4f32:
147+ ; CHECK: @ %bb.0: @ %entry
148+ ; CHECK-NEXT: vfma.f32 q0, q1, q2
149+ ; CHECK-NEXT: bx lr
150+ entry:
151+ %0 = tail call <4 x float > @llvm.arm.mve.fma.v4f32 (<4 x float > %s1 , <4 x float > %s2 , <4 x float > %dst )
152+ ret <4 x float > %0
153+ }
154+
155+ define arm_aapcs_vfpcc <8 x half > @fma_v8f16 (<8 x half > %dst , <8 x half > %s1 , <8 x half > %s2 ) #0 {
156+ ; CHECK-LABEL: fma_v8f16:
157+ ; CHECK: @ %bb.0: @ %entry
158+ ; CHECK-NEXT: vfma.f16 q0, q1, q2
159+ ; CHECK-NEXT: bx lr
160+ entry:
161+ %0 = tail call <8 x half > @llvm.arm.mve.fma.v8f16 (<8 x half > %s1 , <8 x half > %s2 , <8 x half > %dst )
162+ ret <8 x half > %0
163+ }
164+
165+ define arm_aapcs_vfpcc <4 x float > @fma_n_v8f16 (<4 x float > %s1 , <4 x float > %s2 , float %s3 ) #0 {
166+ ; CHECK-LABEL: fma_n_v8f16:
167+ ; CHECK: @ %bb.0: @ %entry
168+ ; CHECK-NEXT: vmov r0, s8
169+ ; CHECK-NEXT: vfma.f32 q0, q1, r0
170+ ; CHECK-NEXT: bx lr
171+ entry:
172+ %i = insertelement <4 x float > poison, float %s3 , i32 0
173+ %sp = shufflevector <4 x float > %i , <4 x float > poison, <4 x i32 > zeroinitializer
174+ %0 = tail call <4 x float > @llvm.arm.mve.fma.v4f32 (<4 x float > %s2 , <4 x float > %sp , <4 x float > %s1 )
175+ ret <4 x float > %0
176+ }
177+
178+ define arm_aapcs_vfpcc <8 x half > @fma_n_v4f32 (<8 x half > %s1 , <8 x half > %s2 , half %s3 ) #0 {
179+ ; CHECK-LABEL: fma_n_v4f32:
180+ ; CHECK: @ %bb.0: @ %entry
181+ ; CHECK-NEXT: vmov.f16 r0, s8
182+ ; CHECK-NEXT: vfma.f16 q0, q1, r0
183+ ; CHECK-NEXT: bx lr
184+ entry:
185+ %i = insertelement <8 x half > poison, half %s3 , i32 0
186+ %sp = shufflevector <8 x half > %i , <8 x half > poison, <8 x i32 > zeroinitializer
187+ %0 = tail call <8 x half > @llvm.arm.mve.fma.v8f16 (<8 x half > %s2 , <8 x half > %sp , <8 x half > %s1 )
188+ ret <8 x half > %0
189+ }
190+
191+ define arm_aapcs_vfpcc <4 x float > @fms_v4f32 (<4 x float > %dst , <4 x float > %s1 , <4 x float > %s2 ) #0 {
192+ ; CHECK-LABEL: fms_v4f32:
193+ ; CHECK: @ %bb.0: @ %entry
194+ ; CHECK-NEXT: vfms.f32 q0, q1, q2
195+ ; CHECK-NEXT: bx lr
196+ entry:
197+ %c = fneg <4 x float > %s1
198+ %0 = tail call <4 x float > @llvm.arm.mve.fma.v4f32 (<4 x float > %c , <4 x float > %s2 , <4 x float > %dst )
199+ ret <4 x float > %0
200+ }
201+
202+ define arm_aapcs_vfpcc <8 x half > @fms_v8f16 (<8 x half > %dst , <8 x half > %s1 , <8 x half > %s2 ) #0 {
203+ ; CHECK-LABEL: fms_v8f16:
204+ ; CHECK: @ %bb.0: @ %entry
205+ ; CHECK-NEXT: vfms.f16 q0, q1, q2
206+ ; CHECK-NEXT: bx lr
207+ entry:
208+ %c = fneg <8 x half > %s1
209+ %0 = tail call <8 x half > @llvm.arm.mve.fma.v8f16 (<8 x half > %c , <8 x half > %s2 , <8 x half > %dst )
210+ ret <8 x half > %0
211+ }
212+
213+ define arm_aapcs_vfpcc <4 x float > @fms_n_v8f16 (<4 x float > %s1 , <4 x float > %s2 , float %s3 ) #0 {
214+ ; CHECK-LABEL: fms_n_v8f16:
215+ ; CHECK: @ %bb.0: @ %entry
216+ ; CHECK-NEXT: vmov r0, s8
217+ ; CHECK-NEXT: vdup.32 q2, r0
218+ ; CHECK-NEXT: vfms.f32 q0, q1, q2
219+ ; CHECK-NEXT: bx lr
220+ entry:
221+ %c = fneg <4 x float > %s2
222+ %i = insertelement <4 x float > poison, float %s3 , i32 0
223+ %sp = shufflevector <4 x float > %i , <4 x float > poison, <4 x i32 > zeroinitializer
224+ %0 = tail call <4 x float > @llvm.arm.mve.fma.v4f32 (<4 x float > %c , <4 x float > %sp , <4 x float > %s1 )
225+ ret <4 x float > %0
226+ }
227+
228+ define arm_aapcs_vfpcc <8 x half > @fms_n_v4f32 (<8 x half > %s1 , <8 x half > %s2 , half %s3 ) #0 {
229+ ; CHECK-LABEL: fms_n_v4f32:
230+ ; CHECK: @ %bb.0: @ %entry
231+ ; CHECK-NEXT: vmov.f16 r0, s8
232+ ; CHECK-NEXT: vdup.16 q2, r0
233+ ; CHECK-NEXT: vfms.f16 q0, q1, q2
234+ ; CHECK-NEXT: bx lr
235+ entry:
236+ %c = fneg <8 x half > %s2
237+ %i = insertelement <8 x half > poison, half %s3 , i32 0
238+ %sp = shufflevector <8 x half > %i , <8 x half > poison, <8 x i32 > zeroinitializer
239+ %0 = tail call <8 x half > @llvm.arm.mve.fma.v8f16 (<8 x half > %c , <8 x half > %sp , <8 x half > %s1 )
240+ ret <8 x half > %0
241+ }
242+
243+ attributes #0 = { strictfp }
0 commit comments