1- ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
1+ ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX906 %s
2+ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX900 %s
23; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
34; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s
45
56; GCN-LABEL: mixlo_simple:
67; GCN: s_waitcnt
7- ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2{{$}}
8+ ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2{{$}}
9+ ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2{{$}}
810; GFX9-NEXT: s_setpc_b64
911
1012; CIVI: v_mac_f32_e32
@@ -16,7 +18,8 @@ define half @mixlo_simple(float %src0, float %src1, float %src2) #0 {
1618}
1719
1820; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f16lo:
19- ; GFX9: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
21+ ; GFX900: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
22+ ; GFX906: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
2023; CI: v_mac_f32
2124; CIVI: v_cvt_f16_f32
2225define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo (half %src0 , half %src1 , half %src2 ) #0 {
@@ -30,7 +33,8 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src
3033
3134; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32:
3235; GCN: s_waitcnt
33- ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
36+ ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
37+ ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
3438; GFX9-NEXT: s_setpc_b64
3539
3640; CIVI: v_mac_f32
@@ -44,7 +48,8 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2
4448
4549; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
4650; GCN: s_waitcnt
47- ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
51+ ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
52+ ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
4853; GFX9-NEXT: s_setpc_b64
4954
5055; CIVI: v_mac_f32_e32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]$}}
@@ -60,7 +65,8 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr
6065
6166; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
6267; GCN: s_waitcnt
63- ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
68+ ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
69+ ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
6470; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
6571; GFX9-NEXT: s_setpc_b64
6672
@@ -79,8 +85,13 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src
7985; operation only clobbers relevant lane.
8086; GCN-LABEL: {{^}}v_mad_mix_v2f32:
8187; GCN: s_waitcnt
82- ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
83- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]{{$}}
88+
89+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
90+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]{{$}}
91+
92+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
93+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]{{$}}
94+
8495; GFX9-NEXT: v_mov_b32_e32 v0, v3
8596; GFX9-NEXT: s_setpc_b64
8697define <2 x half > @v_mad_mix_v2f32 (<2 x half > %src0 , <2 x half > %src1 , <2 x half > %src2 ) #0 {
@@ -94,9 +105,14 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half
94105
95106; GCN-LABEL: {{^}}v_mad_mix_v3f32:
96107; GCN: s_waitcnt
97- ; GFX9-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
98- ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
99- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
108+ ; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
109+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
110+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
111+
112+ ; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
113+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
114+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
115+
100116; GFX9-NEXT: v_mov_b32_e32 v0, v3
101117; GFX9-NEXT: s_setpc_b64
102118define <3 x half > @v_mad_mix_v3f32 (<3 x half > %src0 , <3 x half > %src1 , <3 x half > %src2 ) #0 {
@@ -110,10 +126,16 @@ define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half
110126
111127; GCN-LABEL: {{^}}v_mad_mix_v4f32:
112128; GCN: s_waitcnt
113- ; GFX9-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
114- ; GFX9-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
115- ; GFX9-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
116- ; GFX9-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
129+ ; GFX900-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
130+ ; GFX900-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
131+ ; GFX900-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
132+ ; GFX900-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
133+
134+ ; GFX906-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
135+ ; GFX906-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
136+ ; GFX906-NEXT: v_fma_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
137+ ; GFX906-NEXT: v_fma_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
138+
117139; GFX9-NEXT: v_mov_b32_e32 v0, v7
118140; GFX9-NEXT: v_mov_b32_e32 v1, v6
119141; GFX9-NEXT: s_setpc_b64
@@ -128,8 +150,12 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half
128150
129151; FIXME: Fold clamp
130152; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt:
131- ; GFX9: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
132- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp{{$}}
153+ ; GFX900: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
154+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp{{$}}
155+
156+ ; GFX906: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
157+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp{{$}}
158+
133159; GFX9-NEXT: v_mov_b32_e32 v0, v3
134160; GFX9-NEXT: s_setpc_b64
135161define <2 x half > @v_mad_mix_v2f32_clamp_postcvt (<2 x half > %src0 , <2 x half > %src1 , <2 x half > %src2 ) #0 {
@@ -146,9 +172,15 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s
146172; FIXME: Should be packed into 2 registers per argument?
147173; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_postcvt:
148174; GCN: s_waitcnt
149- ; GFX9-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v0, v2, v4 op_sel_hi:[1,1,1] clamp
150- ; GFX9-DAG: v_mad_mixhi_f16 v{{[0-9]+}}, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
151- ; GFX9-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v1, v3, v5 op_sel_hi:[1,1,1]
175+ ; GFX900-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v0, v2, v4 op_sel_hi:[1,1,1] clamp
176+ ; GFX900-DAG: v_mad_mixhi_f16 v{{[0-9]+}}, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
177+ ; GFX900-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v1, v3, v5 op_sel_hi:[1,1,1]
178+
179+ ; GFX906-DAG: v_fma_mixlo_f16 v{{[0-9]+}}, v0, v2, v4 op_sel_hi:[1,1,1] clamp
180+ ; GFX906-DAG: v_fma_mixhi_f16 v{{[0-9]+}}, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
181+ ; GFX906-DAG: v_fma_mixlo_f16 v{{[0-9]+}}, v1, v3, v5 op_sel_hi:[1,1,1]
182+
183+
152184; GFX9-DAG: v_pk_max_f16 v1, v1, v1 clamp
153185; GFX9: v_mov_b32_e32 v0, v{{[0-9]+}}
154186; GFX9-NEXT: s_setpc_b64
@@ -165,10 +197,18 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
165197
166198; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_postcvt:
167199; GCN: s_waitcnt
168- ; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
169- ; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
170- ; GFX9-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
171- ; GFX9-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
200+ ; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
201+ ; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
202+ ; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
203+ ; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
204+
205+
206+ ; GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
207+ ; GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
208+ ; GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
209+ ; GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
210+
211+
172212; GFX9-NEXT: v_mov_b32_e32 v0, v6
173213; GFX9-NEXT: v_mov_b32_e32 v1, v2
174214; GFX9-NEXT: s_setpc_b64
@@ -185,8 +225,12 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
185225
186226; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_lo:
187227; GCN: s_waitcnt
188- ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
189- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
228+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
229+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
230+
231+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
232+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
233+
190234; GFX9-NEXT: v_mov_b32_e32 v0, v3
191235; GFX9-NEXT: s_setpc_b64
192236define <2 x half > @v_mad_mix_v2f32_clamp_postcvt_lo (<2 x half > %src0 , <2 x half > %src1 , <2 x half > %src2 ) #0 {
@@ -204,8 +248,12 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half>
204248
205249; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_hi:
206250; GCN: s_waitcnt
207- ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
208- ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
251+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
252+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
253+
254+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
255+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
256+
209257; GFX9-NEXT: v_mov_b32_e32 v0, v3
210258; GFX9-NEXT: s_setpc_b64
211259define <2 x half > @v_mad_mix_v2f32_clamp_postcvt_hi (<2 x half > %src0 , <2 x half > %src1 , <2 x half > %src2 ) #0 {
@@ -223,8 +271,12 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half>
223271
224272; FIXME: Should be able to use mixlo/mixhi
225273; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_precvt:
226- ; GFX9: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
227- ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
274+ ; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
275+ ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
276+
277+ ; GFX906: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
278+ ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
279+
228280; GFX9: v_cvt_f16_f32_e32 v1, v3
229281; GFX9: v_cvt_f16_f32_e32 v0, v0
230282; GFX9: v_pack_b32_f16 v0, v0, v1
@@ -243,9 +295,15 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr
243295; FIXME: Handling undef 4th component
244296; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_precvt:
245297; GCN: s_waitcnt
246- ; GFX9-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
247- ; GFX9-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
248- ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
298+ ; GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
299+ ; GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
300+ ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
301+
302+ ; GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
303+ ; GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
304+ ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
305+
306+
249307; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v3
250308; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
251309; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -263,10 +321,16 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr
263321}
264322
265323; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_precvt:
266- ; GFX9: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
267- ; GFX9: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
268- ; GFX9: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
269- ; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
324+ ; GFX900: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
325+ ; GFX900: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
326+ ; GFX900: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
327+ ; GFX900: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
328+
329+
330+ ; GFX906: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
331+ ; GFX906: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
332+ ; GFX906: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
333+ ; GFX906: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
270334
271335; GFX9: v_cvt_f16_f32
272336; GFX9: v_cvt_f16_f32
0 commit comments