@@ -171,3 +171,173 @@ define double @larger_fp_scalar_256bit_vec(<8 x float>* align 32 dereferenceable
171171 %r = load double , double * %bc , align 32
172172 ret double %r
173173}
174+
175+ define <4 x float > @load_f32_insert_v4f32 (float * align 16 dereferenceable (16 ) %p ) {
176+ ; CHECK-LABEL: @load_f32_insert_v4f32(
177+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4
178+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
179+ ; CHECK-NEXT: ret <4 x float> [[R]]
180+ ;
181+ %s = load float , float * %p , align 4
182+ %r = insertelement <4 x float > undef , float %s , i32 0
183+ ret <4 x float > %r
184+ }
185+
186+ define <4 x float > @casted_load_f32_insert_v4f32 (<4 x float >* align 4 dereferenceable (16 ) %p ) {
187+ ; CHECK-LABEL: @casted_load_f32_insert_v4f32(
188+ ; CHECK-NEXT: [[B:%.*]] = bitcast <4 x float>* [[P:%.*]] to float*
189+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[B]], align 4
190+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
191+ ; CHECK-NEXT: ret <4 x float> [[R]]
192+ ;
193+ %b = bitcast <4 x float >* %p to float *
194+ %s = load float , float * %b , align 4
195+ %r = insertelement <4 x float > undef , float %s , i32 0
196+ ret <4 x float > %r
197+ }
198+
199+ define <4 x i32 > @load_i32_insert_v4i32 (i32* align 16 dereferenceable (16 ) %p ) {
200+ ; CHECK-LABEL: @load_i32_insert_v4i32(
201+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[P:%.*]], align 4
202+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0
203+ ; CHECK-NEXT: ret <4 x i32> [[R]]
204+ ;
205+ %s = load i32 , i32* %p , align 4
206+ %r = insertelement <4 x i32 > undef , i32 %s , i32 0
207+ ret <4 x i32 > %r
208+ }
209+
210+ define <4 x i32 > @casted_load_i32_insert_v4i32 (<16 x i8 >* align 4 dereferenceable (16 ) %p ) {
211+ ; CHECK-LABEL: @casted_load_i32_insert_v4i32(
212+ ; CHECK-NEXT: [[B:%.*]] = bitcast <16 x i8>* [[P:%.*]] to i32*
213+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 4
214+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0
215+ ; CHECK-NEXT: ret <4 x i32> [[R]]
216+ ;
217+ %b = bitcast <16 x i8 >* %p to i32*
218+ %s = load i32 , i32* %b , align 4
219+ %r = insertelement <4 x i32 > undef , i32 %s , i32 0
220+ ret <4 x i32 > %r
221+ }
222+
223+ define <4 x float > @gep00_load_f32_insert_v4f32 (<4 x float >* align 16 dereferenceable (16 ) %p ) {
224+ ; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
225+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 0
226+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[GEP]], align 16
227+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i64 0
228+ ; CHECK-NEXT: ret <4 x float> [[R]]
229+ ;
230+ %gep = getelementptr inbounds <4 x float >, <4 x float >* %p , i64 0 , i64 0
231+ %s = load float , float * %gep , align 16
232+ %r = insertelement <4 x float > undef , float %s , i64 0
233+ ret <4 x float > %r
234+ }
235+
236+ define <8 x i16 > @gep01_load_i16_insert_v8i16 (<8 x i16 >* align 16 dereferenceable (18 ) %p ) {
237+ ; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
238+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
239+ ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2
240+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
241+ ; CHECK-NEXT: ret <8 x i16> [[R]]
242+ ;
243+ %gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 0 , i64 1
244+ %s = load i16 , i16* %gep , align 2
245+ %r = insertelement <8 x i16 > undef , i16 %s , i64 0
246+ ret <8 x i16 > %r
247+ }
248+
249+ define <8 x i16 > @gep01_load_i16_insert_v8i16_deref (<8 x i16 >* align 16 dereferenceable (17 ) %p ) {
250+ ; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref(
251+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
252+ ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2
253+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
254+ ; CHECK-NEXT: ret <8 x i16> [[R]]
255+ ;
256+ %gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 0 , i64 1
257+ %s = load i16 , i16* %gep , align 2
258+ %r = insertelement <8 x i16 > undef , i16 %s , i64 0
259+ ret <8 x i16 > %r
260+ }
261+
262+ define <8 x i16 > @gep10_load_i16_insert_v8i16 (<8 x i16 >* align 16 dereferenceable (32 ) %p ) {
263+ ; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
264+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
265+ ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
266+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
267+ ; CHECK-NEXT: ret <8 x i16> [[R]]
268+ ;
269+ %gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 1 , i64 0
270+ %s = load i16 , i16* %gep , align 16
271+ %r = insertelement <8 x i16 > undef , i16 %s , i64 0
272+ ret <8 x i16 > %r
273+ }
274+
275+ define <8 x i16 > @gep10_load_i16_insert_v8i16_deref (<8 x i16 >* align 16 dereferenceable (31 ) %p ) {
276+ ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_deref(
277+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
278+ ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
279+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
280+ ; CHECK-NEXT: ret <8 x i16> [[R]]
281+ ;
282+ %gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 1 , i64 0
283+ %s = load i16 , i16* %gep , align 16
284+ %r = insertelement <8 x i16 > undef , i16 %s , i64 0
285+ ret <8 x i16 > %r
286+ }
287+
288+ define <4 x float > @load_f32_insert_v4f32_volatile (float * align 16 dereferenceable (16 ) %p ) {
289+ ; CHECK-LABEL: @load_f32_insert_v4f32_volatile(
290+ ; CHECK-NEXT: [[S:%.*]] = load volatile float, float* [[P:%.*]], align 4
291+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
292+ ; CHECK-NEXT: ret <4 x float> [[R]]
293+ ;
294+ %s = load volatile float , float * %p , align 4
295+ %r = insertelement <4 x float > undef , float %s , i32 0
296+ ret <4 x float > %r
297+ }
298+
299+ define <4 x float > @load_f32_insert_v4f32_align (float * align 1 dereferenceable (16 ) %p ) {
300+ ; CHECK-LABEL: @load_f32_insert_v4f32_align(
301+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4
302+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
303+ ; CHECK-NEXT: ret <4 x float> [[R]]
304+ ;
305+ %s = load float , float * %p , align 4
306+ %r = insertelement <4 x float > undef , float %s , i32 0
307+ ret <4 x float > %r
308+ }
309+
310+ define <4 x float > @load_f32_insert_v4f32_deref (float * align 4 dereferenceable (15 ) %p ) {
311+ ; CHECK-LABEL: @load_f32_insert_v4f32_deref(
312+ ; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4
313+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
314+ ; CHECK-NEXT: ret <4 x float> [[R]]
315+ ;
316+ %s = load float , float * %p , align 4
317+ %r = insertelement <4 x float > undef , float %s , i32 0
318+ ret <4 x float > %r
319+ }
320+
321+ define <8 x i32 > @load_i32_insert_v8i32 (i32* align 16 dereferenceable (16 ) %p ) {
322+ ; CHECK-LABEL: @load_i32_insert_v8i32(
323+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[P:%.*]], align 4
324+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
325+ ; CHECK-NEXT: ret <8 x i32> [[R]]
326+ ;
327+ %s = load i32 , i32* %p , align 4
328+ %r = insertelement <8 x i32 > undef , i32 %s , i32 0
329+ ret <8 x i32 > %r
330+ }
331+
332+ define <8 x i32 > @casted_load_i32_insert_v8i32 (<4 x i32 >* align 4 dereferenceable (16 ) %p ) {
333+ ; CHECK-LABEL: @casted_load_i32_insert_v8i32(
334+ ; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i32>* [[P:%.*]] to i32*
335+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 4
336+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
337+ ; CHECK-NEXT: ret <8 x i32> [[R]]
338+ ;
339+ %b = bitcast <4 x i32 >* %p to i32*
340+ %s = load i32 , i32* %b , align 4
341+ %r = insertelement <8 x i32 > undef , i32 %s , i32 0
342+ ret <8 x i32 > %r
343+ }
0 commit comments