@@ -344,6 +344,109 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
344344 fx. bcx . ins ( ) . sshr ( a_lane, saturated_count)
345345 } ) ;
346346 }
347+ "llvm.x86.sse2.psad.bw" => {
348+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
349+
350+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
351+ let layout = a. layout ( ) ;
352+
353+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
354+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
355+ assert_eq ! ( lane_ty, fx. tcx. types. u8 ) ;
356+ assert_eq ! ( ret_lane_ty, fx. tcx. types. u64 ) ;
357+ assert_eq ! ( lane_count, ret_lane_count * 8 ) ;
358+
359+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . u64 ) ;
360+ for out_lane_idx in 0 ..lane_count / 8 {
361+ let mut lane_diff_acc = fx. bcx . ins ( ) . iconst ( types:: I64 , 0 ) ;
362+
363+ for lane_idx in out_lane_idx * 8 ..out_lane_idx * 8 + 1 {
364+ let a_lane = a. value_lane ( fx, lane_idx) . load_scalar ( fx) ;
365+ let b_lane = b. value_lane ( fx, lane_idx) . load_scalar ( fx) ;
366+
367+ let lane_diff = fx. bcx . ins ( ) . isub ( a_lane, b_lane) ;
368+ let abs_lane_diff = fx. bcx . ins ( ) . iabs ( lane_diff) ;
369+ let abs_lane_diff = fx. bcx . ins ( ) . uextend ( types:: I64 , abs_lane_diff) ;
370+ lane_diff_acc = fx. bcx . ins ( ) . iadd ( lane_diff_acc, abs_lane_diff) ;
371+ }
372+
373+ let res_lane = CValue :: by_val ( lane_diff_acc, ret_lane_layout) ;
374+
375+ ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
376+ }
377+ }
378+ "llvm.x86.ssse3.pmadd.ub.sw.128" => {
379+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
380+
381+ let ( lane_count, lane_ty) = a. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
382+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
383+ assert_eq ! ( lane_ty, fx. tcx. types. u8 ) ;
384+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i16 ) ;
385+ assert_eq ! ( lane_count, ret_lane_count * 2 ) ;
386+
387+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i16 ) ;
388+ for out_lane_idx in 0 ..lane_count / 2 {
389+ let a_lane0 = a. value_lane ( fx, out_lane_idx * 2 ) . load_scalar ( fx) ;
390+ let a_lane0 = fx. bcx . ins ( ) . uextend ( types:: I16 , a_lane0) ;
391+ let b_lane0 = b. value_lane ( fx, out_lane_idx * 2 ) . load_scalar ( fx) ;
392+ let b_lane0 = fx. bcx . ins ( ) . sextend ( types:: I16 , b_lane0) ;
393+
394+ let a_lane1 = a. value_lane ( fx, out_lane_idx * 2 + 1 ) . load_scalar ( fx) ;
395+ let a_lane1 = fx. bcx . ins ( ) . uextend ( types:: I16 , a_lane1) ;
396+ let b_lane1 = b. value_lane ( fx, out_lane_idx * 2 + 1 ) . load_scalar ( fx) ;
397+ let b_lane1 = fx. bcx . ins ( ) . sextend ( types:: I16 , b_lane1) ;
398+
399+ let mul0: Value = fx. bcx . ins ( ) . imul ( a_lane0, b_lane0) ;
400+ let mul1 = fx. bcx . ins ( ) . imul ( a_lane1, b_lane1) ;
401+
402+ let ( val, has_overflow) = fx. bcx . ins ( ) . sadd_overflow ( mul0, mul1) ;
403+
404+ let rhs_ge_zero = fx. bcx . ins ( ) . icmp_imm ( IntCC :: SignedGreaterThanOrEqual , mul1, 0 ) ;
405+
406+ let min = fx. bcx . ins ( ) . iconst ( types:: I16 , i64:: from ( i16:: MIN as u16 ) ) ;
407+ let max = fx. bcx . ins ( ) . iconst ( types:: I16 , i64:: from ( i16:: MAX as u16 ) ) ;
408+
409+ let sat_val = fx. bcx . ins ( ) . select ( rhs_ge_zero, max, min) ;
410+ let res_lane = fx. bcx . ins ( ) . select ( has_overflow, sat_val, val) ;
411+
412+ let res_lane = CValue :: by_val ( res_lane, ret_lane_layout) ;
413+
414+ ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
415+ }
416+ }
417+ "llvm.x86.sse2.pmadd.wd" => {
418+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
419+
420+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
421+ let layout = a. layout ( ) ;
422+
423+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
424+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
425+ assert_eq ! ( lane_ty, fx. tcx. types. i16 ) ;
426+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i32 ) ;
427+ assert_eq ! ( lane_count, ret_lane_count * 2 ) ;
428+
429+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i32 ) ;
430+ for out_lane_idx in 0 ..lane_count / 2 {
431+ let a_lane0 = a. value_lane ( fx, out_lane_idx * 2 ) . load_scalar ( fx) ;
432+ let a_lane0 = fx. bcx . ins ( ) . uextend ( types:: I32 , a_lane0) ;
433+ let b_lane0 = b. value_lane ( fx, out_lane_idx * 2 ) . load_scalar ( fx) ;
434+ let b_lane0 = fx. bcx . ins ( ) . sextend ( types:: I32 , b_lane0) ;
435+
436+ let a_lane1 = a. value_lane ( fx, out_lane_idx * 2 + 1 ) . load_scalar ( fx) ;
437+ let a_lane1 = fx. bcx . ins ( ) . uextend ( types:: I32 , a_lane1) ;
438+ let b_lane1 = b. value_lane ( fx, out_lane_idx * 2 + 1 ) . load_scalar ( fx) ;
439+ let b_lane1 = fx. bcx . ins ( ) . sextend ( types:: I32 , b_lane1) ;
440+
441+ let mul0: Value = fx. bcx . ins ( ) . imul ( a_lane0, b_lane0) ;
442+ let mul1 = fx. bcx . ins ( ) . imul ( a_lane1, b_lane1) ;
443+
444+ let res_lane = fx. bcx . ins ( ) . iadd ( mul0, mul1) ;
445+ let res_lane = CValue :: by_val ( res_lane, ret_lane_layout) ;
446+
447+ ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
448+ }
449+ }
347450 _ => {
348451 fx. tcx
349452 . sess
0 commit comments