@@ -74,6 +74,93 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
7474 ret. write_cvalue ( fx, val) ;
7575 }
7676
77+ "llvm.x86.avx2.gather.d.ps"
78+ | "llvm.x86.avx2.gather.d.pd"
79+ | "llvm.x86.avx2.gather.d.ps.256"
80+ | "llvm.x86.avx2.gather.d.pd.256"
81+ | "llvm.x86.avx2.gather.q.ps"
82+ | "llvm.x86.avx2.gather.q.pd"
83+ | "llvm.x86.avx2.gather.q.ps.256"
84+ | "llvm.x86.avx2.gather.q.pd.256" => {
85+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818
86+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd&ig_expand=3819
87+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd&ig_expand=3821
88+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd&ig_expand=3822
89+ // ...
90+
91+ intrinsic_args ! ( fx, args => ( src, ptr, index, mask, scale) ; intrinsic) ;
92+
93+ let ( src_lane_count, src_lane_ty) = src. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
94+ let ( index_lane_count, index_lane_ty) = index. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
95+ let ( mask_lane_count, mask_lane_ty) = mask. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
96+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
97+ assert ! ( src_lane_ty. is_floating_point( ) ) ;
98+ assert ! ( index_lane_ty. is_integral( ) ) ;
99+ assert ! ( mask_lane_ty. is_floating_point( ) ) ;
100+ assert ! ( ret_lane_ty. is_floating_point( ) ) ;
101+ assert_eq ! ( src_lane_count, mask_lane_count) ;
102+ assert_eq ! ( src_lane_count, ret_lane_count) ;
103+
104+ let lane_clif_ty = fx. clif_type ( ret_lane_ty) . unwrap ( ) ;
105+ let index_lane_clif_ty = fx. clif_type ( index_lane_ty) . unwrap ( ) ;
106+ let mask_lane_clif_ty = fx. clif_type ( mask_lane_ty) . unwrap ( ) ;
107+ let ret_lane_layout = fx. layout_of ( ret_lane_ty) ;
108+
109+ let ptr = ptr. load_scalar ( fx) ;
110+ let scale = scale. load_scalar ( fx) ;
111+ let scale = fx. bcx . ins ( ) . uextend ( types:: I64 , scale) ;
112+ for lane_idx in 0 ..std:: cmp:: min ( src_lane_count, index_lane_count) {
113+ let src_lane = src. value_lane ( fx, lane_idx) . load_scalar ( fx) ;
114+ let index_lane = index. value_lane ( fx, lane_idx) . load_scalar ( fx) ;
115+ let mask_lane = mask. value_lane ( fx, lane_idx) . load_scalar ( fx) ;
116+ let mask_lane =
117+ fx. bcx . ins ( ) . bitcast ( mask_lane_clif_ty. as_int ( ) , MemFlags :: new ( ) , mask_lane) ;
118+
119+ let if_enabled = fx. bcx . create_block ( ) ;
120+ let if_disabled = fx. bcx . create_block ( ) ;
121+ let next = fx. bcx . create_block ( ) ;
122+ let res_lane = fx. bcx . append_block_param ( next, lane_clif_ty) ;
123+
124+ let mask_lane = match mask_lane_clif_ty {
125+ types:: F32 => fx. bcx . ins ( ) . band_imm ( mask_lane, 0x8000_0000u64 as i64 ) ,
126+ types:: F64 => fx. bcx . ins ( ) . band_imm ( mask_lane, 0x8000_0000_0000_0000u64 as i64 ) ,
127+ _ => unreachable ! ( ) ,
128+ } ;
129+ fx. bcx . ins ( ) . brif ( mask_lane, if_enabled, & [ ] , if_disabled, & [ ] ) ;
130+ fx. bcx . seal_block ( if_enabled) ;
131+ fx. bcx . seal_block ( if_disabled) ;
132+
133+ fx. bcx . switch_to_block ( if_enabled) ;
134+ let index_lane = if index_lane_clif_ty != types:: I64 {
135+ fx. bcx . ins ( ) . sextend ( types:: I64 , index_lane)
136+ } else {
137+ index_lane
138+ } ;
139+ let offset = fx. bcx . ins ( ) . imul ( index_lane, scale) ;
140+ let lane_ptr = fx. bcx . ins ( ) . iadd ( ptr, offset) ;
141+ let res = fx. bcx . ins ( ) . load ( lane_clif_ty, MemFlags :: trusted ( ) , lane_ptr, 0 ) ;
142+ fx. bcx . ins ( ) . jump ( next, & [ res] ) ;
143+
144+ fx. bcx . switch_to_block ( if_disabled) ;
145+ fx. bcx . ins ( ) . jump ( next, & [ src_lane] ) ;
146+
147+ fx. bcx . seal_block ( next) ;
148+ fx. bcx . switch_to_block ( next) ;
149+
150+ fx. bcx . ins ( ) . nop ( ) ;
151+
152+ ret. place_lane ( fx, lane_idx)
153+ . write_cvalue ( fx, CValue :: by_val ( res_lane, ret_lane_layout) ) ;
154+ }
155+
156+ for lane_idx in std:: cmp:: min ( src_lane_count, index_lane_count) ..ret_lane_count {
157+ let zero_lane = fx. bcx . ins ( ) . iconst ( mask_lane_clif_ty. as_int ( ) , 0 ) ;
158+ let zero_lane = fx. bcx . ins ( ) . bitcast ( mask_lane_clif_ty, MemFlags :: new ( ) , zero_lane) ;
159+ ret. place_lane ( fx, lane_idx)
160+ . write_cvalue ( fx, CValue :: by_val ( zero_lane, ret_lane_layout) ) ;
161+ }
162+ }
163+
77164 "llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
78165 let ( x, y, kind) = match args {
79166 [ x, y, kind] => ( x, y, kind) ,
0 commit comments