@@ -153,7 +153,11 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
153153 | "__builtin_ia32_psrav16hi_mask"
154154 | "__builtin_ia32_psrav8hi_mask"
155155 | "__builtin_ia32_permvarhi256_mask"
156- | "__builtin_ia32_permvarhi128_mask" => {
156+ | "__builtin_ia32_permvarhi128_mask"
157+ | "__builtin_ia32_maxph128_mask"
158+ | "__builtin_ia32_maxph256_mask"
159+ | "__builtin_ia32_minph128_mask"
160+ | "__builtin_ia32_minph256_mask" => {
157161 let mut new_args = args. to_vec ( ) ;
158162 let arg3_type = gcc_func. get_param_type ( 2 ) ;
159163 let vector_type = arg3_type. dyncast_vector ( ) . expect ( "vector type" ) ;
@@ -194,7 +198,13 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
194198 | "__builtin_ia32_cvtqq2ps256_mask"
195199 | "__builtin_ia32_cvtuqq2pd128_mask"
196200 | "__builtin_ia32_cvtuqq2pd256_mask"
197- | "__builtin_ia32_cvtuqq2ps256_mask" => {
201+ | "__builtin_ia32_cvtuqq2ps256_mask"
202+ | "__builtin_ia32_vcvtw2ph128_mask"
203+ | "__builtin_ia32_vcvtw2ph256_mask"
204+ | "__builtin_ia32_vcvtuw2ph128_mask"
205+ | "__builtin_ia32_vcvtuw2ph256_mask"
206+ | "__builtin_ia32_vcvtdq2ph256_mask"
207+ | "__builtin_ia32_vcvtudq2ph256_mask" => {
198208 let mut new_args = args. to_vec ( ) ;
199209 // Remove last arg as it doesn't seem to be used in GCC and is always false.
200210 new_args. pop ( ) ;
@@ -296,7 +306,8 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
296306 "__builtin_ia32_vfmaddsubps512_mask"
297307 | "__builtin_ia32_vfmaddsubpd512_mask"
298308 | "__builtin_ia32_cmpsh_mask_round"
299- | "__builtin_ia32_vfmaddph512_mask" => {
309+ | "__builtin_ia32_vfmaddph512_mask"
310+ | "__builtin_ia32_vfmaddsubph512_mask" => {
300311 let mut new_args = args. to_vec ( ) ;
301312 let last_arg = new_args. pop ( ) . expect ( "last arg" ) ;
302313 let arg4_type = gcc_func. get_param_type ( 3 ) ;
@@ -319,9 +330,6 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
319330 | "__builtin_ia32_vpermi2varpd128_mask"
320331 | "__builtin_ia32_vpmadd52huq512_mask"
321332 | "__builtin_ia32_vpmadd52luq512_mask"
322- | "__builtin_ia32_vpmadd52huq256_mask"
323- | "__builtin_ia32_vpmadd52luq256_mask"
324- | "__builtin_ia32_vpmadd52huq128_mask"
325333 | "__builtin_ia32_vfmaddsubph128_mask"
326334 | "__builtin_ia32_vfmaddsubph256_mask" => {
327335 let mut new_args = args. to_vec ( ) ;
@@ -405,7 +413,14 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
405413 "__builtin_ia32_cvtqq2pd512_mask"
406414 | "__builtin_ia32_cvtqq2ps512_mask"
407415 | "__builtin_ia32_cvtuqq2pd512_mask"
408- | "__builtin_ia32_cvtuqq2ps512_mask" => {
416+ | "__builtin_ia32_cvtuqq2ps512_mask"
417+ | "__builtin_ia32_sqrtph512_mask_round"
418+ | "__builtin_ia32_vcvtw2ph512_mask_round"
419+ | "__builtin_ia32_vcvtuw2ph512_mask_round"
420+ | "__builtin_ia32_vcvtdq2ph512_mask_round"
421+ | "__builtin_ia32_vcvtudq2ph512_mask_round"
422+ | "__builtin_ia32_vcvtqq2ph512_mask_round"
423+ | "__builtin_ia32_vcvtuqq2ph512_mask_round" => {
409424 let mut old_args = args. to_vec ( ) ;
410425 let mut new_args = vec ! [ ] ;
411426 new_args. push ( old_args. swap_remove ( 0 ) ) ;
@@ -425,7 +440,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
425440 "__builtin_ia32_addph512_mask_round"
426441 | "__builtin_ia32_subph512_mask_round"
427442 | "__builtin_ia32_mulph512_mask_round"
428- | "__builtin_ia32_divph512_mask_round" => {
443+ | "__builtin_ia32_divph512_mask_round"
444+ | "__builtin_ia32_maxph512_mask_round"
445+ | "__builtin_ia32_minph512_mask_round" => {
429446 let mut new_args = args. to_vec ( ) ;
430447 let last_arg = new_args. pop ( ) . expect ( "last arg" ) ;
431448
@@ -460,7 +477,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
460477 }
461478 } else {
462479 match func_name {
463- "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => {
480+ "__builtin_ia32_rndscaless_mask_round"
481+ | "__builtin_ia32_rndscalesd_mask_round"
482+ | "__builtin_ia32_reducesh_mask_round" => {
464483 let new_args = args. to_vec ( ) ;
465484 let arg3_type = gcc_func. get_param_type ( 2 ) ;
466485 let arg3 = builder. context . new_cast ( None , new_args[ 4 ] , arg3_type) ;
@@ -585,6 +604,12 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
585604 new_args[ 2 ] = builder. context . new_cast ( None , new_args[ 2 ] , builder. double_type ) ;
586605 args = new_args. into ( ) ;
587606 }
607+ "__builtin_ia32_sqrtsh_mask_round" => {
608+ // The first two arguments are inverted, so swap them.
609+ let mut new_args = args. to_vec ( ) ;
610+ new_args. swap ( 0 , 1 ) ;
611+ args = new_args. into ( ) ;
612+ }
588613 _ => ( ) ,
589614 }
590615 }
@@ -1090,9 +1115,9 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
10901115 "llvm.x86.avx512.dbpsadbw.128" => "__builtin_ia32_dbpsadbw128_mask" ,
10911116 "llvm.x86.avx512.vpmadd52h.uq.512" => "__builtin_ia32_vpmadd52huq512_mask" ,
10921117 "llvm.x86.avx512.vpmadd52l.uq.512" => "__builtin_ia32_vpmadd52luq512_mask" ,
1093- "llvm.x86.avx512.vpmadd52h.uq.256" => "__builtin_ia32_vpmadd52huq256_mask " ,
1094- "llvm.x86.avx512.vpmadd52l.uq.256" => "__builtin_ia32_vpmadd52luq256_mask " ,
1095- "llvm.x86.avx512.vpmadd52h.uq.128" => "__builtin_ia32_vpmadd52huq128_mask " ,
1118+ "llvm.x86.avx512.vpmadd52h.uq.256" => "__builtin_ia32_vpmadd52huq256 " ,
1119+ "llvm.x86.avx512.vpmadd52l.uq.256" => "__builtin_ia32_vpmadd52luq256 " ,
1120+ "llvm.x86.avx512.vpmadd52h.uq.128" => "__builtin_ia32_vpmadd52huq128 " ,
10961121 "llvm.x86.avx512.vpdpwssd.512" => "__builtin_ia32_vpdpwssd_v16si" ,
10971122 "llvm.x86.avx512.vpdpwssd.256" => "__builtin_ia32_vpdpwssd_v8si" ,
10981123 "llvm.x86.avx512.vpdpwssd.128" => "__builtin_ia32_vpdpwssd_v4si" ,
@@ -1209,6 +1234,55 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
12091234 "llvm.x86.avx512fp16.vfmadd.f16" => "__builtin_ia32_vfmaddsh3_mask" ,
12101235 "llvm.x86.avx512fp16.vfmaddsub.ph.128" => "__builtin_ia32_vfmaddsubph128_mask" ,
12111236 "llvm.x86.avx512fp16.vfmaddsub.ph.256" => "__builtin_ia32_vfmaddsubph256_mask" ,
1237+ "llvm.x86.avx512fp16.vfmaddsub.ph.512" => "__builtin_ia32_vfmaddsubph512_mask" ,
1238+ "llvm.x86.avx512fp16.sqrt.ph.512" => "__builtin_ia32_sqrtph512_mask_round" ,
1239+ "llvm.x86.avx512fp16.mask.sqrt.sh" => "__builtin_ia32_sqrtsh_mask_round" ,
1240+ "llvm.x86.avx512fp16.max.ph.128" => "__builtin_ia32_maxph128_mask" ,
1241+ "llvm.x86.avx512fp16.max.ph.256" => "__builtin_ia32_maxph256_mask" ,
1242+ "llvm.x86.avx512fp16.max.ph.512" => "__builtin_ia32_maxph512_mask_round" ,
1243+ "llvm.x86.avx512fp16.min.ph.128" => "__builtin_ia32_minph128_mask" ,
1244+ "llvm.x86.avx512fp16.min.ph.256" => "__builtin_ia32_minph256_mask" ,
1245+ "llvm.x86.avx512fp16.min.ph.512" => "__builtin_ia32_minph512_mask_round" ,
1246+ "llvm.x86.avx512fp16.mask.getexp.sh" => "__builtin_ia32_getexpsh_mask_round" ,
1247+ "llvm.x86.avx512fp16.mask.rndscale.ph.128" => "__builtin_ia32_rndscaleph128_mask" ,
1248+ "llvm.x86.avx512fp16.mask.rndscale.ph.256" => "__builtin_ia32_rndscaleph256_mask" ,
1249+ "llvm.x86.avx512fp16.mask.rndscale.ph.512" => "__builtin_ia32_rndscaleph512_mask_round" ,
1250+ "llvm.x86.avx512fp16.mask.scalef.ph.512" => "__builtin_ia32_scalefph512_mask_round" ,
1251+ "llvm.x86.avx512fp16.mask.reduce.ph.512" => "__builtin_ia32_reduceph512_mask_round" ,
1252+ "llvm.x86.avx512fp16.mask.reduce.sh" => "__builtin_ia32_reducesh_mask_round" ,
1253+ "llvm.x86.avx512.sitofp.round.v8f16.v8i16" => "__builtin_ia32_vcvtw2ph128_mask" ,
1254+ "llvm.x86.avx512.sitofp.round.v16f16.v16i16" => "__builtin_ia32_vcvtw2ph256_mask" ,
1255+ "llvm.x86.avx512.sitofp.round.v32f16.v32i16" => "__builtin_ia32_vcvtw2ph512_mask_round" ,
1256+ "llvm.x86.avx512.uitofp.round.v8f16.v8u16" => "__builtin_ia32_vcvtuw2ph128_mask" ,
1257+ "llvm.x86.avx512.uitofp.round.v16f16.v16u16" => "__builtin_ia32_vcvtuw2ph256_mask" ,
1258+ "llvm.x86.avx512.uitofp.round.v32f16.v32u16" => "__builtin_ia32_vcvtuw2ph512_mask_round" ,
1259+ "llvm.x86.avx512.sitofp.round.v8f16.v8i32" => "__builtin_ia32_vcvtdq2ph256_mask" ,
1260+ "llvm.x86.avx512.sitofp.round.v16f16.v16i32" => "__builtin_ia32_vcvtdq2ph512_mask_round" ,
1261+ "llvm.x86.avx512fp16.vcvtsi2sh" => "__builtin_ia32_vcvtsi2sh32_round" ,
1262+ "llvm.x86.avx512.uitofp.round.v8f16.v8u32" => "__builtin_ia32_vcvtudq2ph256_mask" ,
1263+ "llvm.x86.avx512.uitofp.round.v16f16.v16u32" => "__builtin_ia32_vcvtudq2ph512_mask_round" ,
1264+ "llvm.x86.avx512fp16.vcvtusi2sh" => "__builtin_ia32_vcvtusi2sh32_round" ,
1265+ "llvm.x86.avx512.sitofp.round.v8f16.v8i64" => "__builtin_ia32_vcvtqq2ph512_mask_round" ,
1266+ "llvm.x86.avx512.uitofp.round.v8f16.v8u64" => "__builtin_ia32_vcvtuqq2ph512_mask_round" ,
1267+ "llvm.x86.avx512fp16.mask.vcvtps2phx.512" => "__builtin_ia32_vcvtps2phx512_mask_round" ,
1268+ "llvm.x86.avx512fp16.mask.vcvtpd2ph.512" => "__builtin_ia32_vcvtpd2ph512_mask_round" ,
1269+ "llvm.x86.avx512fp16.mask.vcvtph2uw.512" => "__builtin_ia32_vcvtph2uw512_mask_round" ,
1270+ "llvm.x86.avx512fp16.mask.vcvttph2w.512" => "__builtin_ia32_vcvttph2w512_mask_round" ,
1271+ "llvm.x86.avx512fp16.mask.vcvttph2uw.512" => "__builtin_ia32_vcvttph2uw512_mask_round" ,
1272+ "llvm.x86.avx512fp16.mask.vcvtph2dq.512" => "__builtin_ia32_vcvtph2dq512_mask_round" ,
1273+ "llvm.x86.avx512fp16.vcvtsh2si32" => "__builtin_ia32_vcvtsh2si32_round" ,
1274+ "llvm.x86.avx512fp16.mask.vcvtph2udq.512" => "__builtin_ia32_vcvtph2udq512_mask_round" ,
1275+ "llvm.x86.avx512fp16.vcvtsh2usi32" => "__builtin_ia32_vcvtsh2usi32_round" ,
1276+ "llvm.x86.avx512fp16.mask.vcvttph2dq.512" => "__builtin_ia32_vcvttph2dq512_mask_round" ,
1277+ "llvm.x86.avx512fp16.vcvttsh2si32" => "__builtin_ia32_vcvttsh2si32_round" ,
1278+ "llvm.x86.avx512fp16.mask.vcvttph2udq.512" => "__builtin_ia32_vcvttph2udq512_mask_round" ,
1279+ "llvm.x86.avx512fp16.vcvttsh2usi32" => "__builtin_ia32_vcvttsh2usi32_round" ,
1280+ "llvm.x86.avx512fp16.mask.vcvtph2qq.512" => "__builtin_ia32_vcvtph2qq512_mask_round" ,
1281+ "llvm.x86.avx512fp16.mask.vcvtph2uqq.512" => "__builtin_ia32_vcvtph2uqq512_mask_round" ,
1282+ "llvm.x86.avx512fp16.mask.vcvttph2qq.512" => "__builtin_ia32_vcvttph2qq512_mask_round" ,
1283+ "llvm.x86.avx512fp16.mask.vcvttph2uqq.512" => "__builtin_ia32_vcvttph2uqq512_mask_round" ,
1284+ "llvm.x86.avx512fp16.mask.vcvtph2psx.512" => "__builtin_ia32_vcvtph2psx512_mask_round" ,
1285+ "llvm.x86.avx512fp16.mask.vcvtph2pd.512" => "__builtin_ia32_vcvtph2pd512_mask_round" ,
12121286
12131287 // TODO: support the tile builtins:
12141288 "llvm.x86.ldtilecfg" => "__builtin_trap" ,
0 commit comments