@@ -310,6 +310,40 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
310310 let val = CValue :: by_val_pair ( cb_out, c, layout) ;
311311 ret. write_cvalue ( fx, val) ;
312312 }
313+ "llvm.x86.sse2.pavg.b" | "llvm.x86.sse2.pavg.w" => {
314+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
315+
316+ // FIXME use vector instructions when possible
317+ simd_pair_for_each_lane (
318+ fx,
319+ a,
320+ b,
321+ ret,
322+ & |fx, _lane_ty, _res_lane_ty, a_lane, b_lane| {
323+ // (a + b + 1) >> 1
324+ let lane_ty = fx. bcx . func . dfg . value_type ( a_lane) ;
325+ let a_lane = fx. bcx . ins ( ) . uextend ( lane_ty. double_width ( ) . unwrap ( ) , a_lane) ;
326+ let b_lane = fx. bcx . ins ( ) . uextend ( lane_ty. double_width ( ) . unwrap ( ) , b_lane) ;
327+ let sum = fx. bcx . ins ( ) . iadd ( a_lane, b_lane) ;
328+ let num_plus_one = fx. bcx . ins ( ) . iadd_imm ( sum, 1 ) ;
329+ let res = fx. bcx . ins ( ) . ushr_imm ( num_plus_one, 1 ) ;
330+ fx. bcx . ins ( ) . ireduce ( lane_ty, res)
331+ } ,
332+ ) ;
333+ }
334+ "llvm.x86.sse2.psra.w" => {
335+ intrinsic_args ! ( fx, args => ( a, count) ; intrinsic) ;
336+
337+ let count_lane = count. force_stack ( fx) . 0 . load ( fx, types:: I64 , MemFlags :: trusted ( ) ) ;
338+ let lane_ty = fx. clif_type ( a. layout ( ) . ty . simd_size_and_type ( fx. tcx ) . 1 ) . unwrap ( ) ;
339+ let max_count = fx. bcx . ins ( ) . iconst ( types:: I64 , i64:: from ( lane_ty. bits ( ) - 1 ) ) ;
340+ let saturated_count = fx. bcx . ins ( ) . umin ( count_lane, max_count) ;
341+
342+ // FIXME use vector instructions when possible
343+ simd_for_each_lane ( fx, a, ret, & |fx, _lane_ty, _res_lane_ty, a_lane| {
344+ fx. bcx . ins ( ) . sshr ( a_lane, saturated_count)
345+ } ) ;
346+ }
313347 _ => {
314348 fx. tcx
315349 . sess
0 commit comments