xref: /llvm-project/llvm/test/CodeGen/X86/avx-insertelt.ll (revision d3b0fba6084d800ff37432705573c1b0318c7f06)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=ALL --check-prefix=AVX
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4
5; 0'th element insertion into an AVX register.
6
7define <8 x float> @insert_f32_firstelt_of_low_subvector(<8 x float> %x, float %s) {
8; ALL-LABEL: insert_f32_firstelt_of_low_subvector:
9; ALL:       # %bb.0:
10; ALL-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
11; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
12; ALL-NEXT:    retq
13  %i0 = insertelement <8 x float> %x, float %s, i32 0
14  ret <8 x float> %i0
15}
16
17define <4 x double> @insert_f64_firstelt_of_low_subvector(<4 x double> %x, double %s) {
18; ALL-LABEL: insert_f64_firstelt_of_low_subvector:
19; ALL:       # %bb.0:
20; ALL-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
21; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
22; ALL-NEXT:    retq
23  %i0 = insertelement <4 x double> %x, double %s, i32 0
24  ret <4 x double> %i0
25}
26
27define <32 x i8> @insert_i8_firstelt_of_low_subvector(<32 x i8> %x, i8 %s) {
28; AVX-LABEL: insert_i8_firstelt_of_low_subvector:
29; AVX:       # %bb.0:
30; AVX-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm1
31; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
32; AVX-NEXT:    retq
33;
34; AVX2-LABEL: insert_i8_firstelt_of_low_subvector:
35; AVX2:       # %bb.0:
36; AVX2-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm1
37; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
38; AVX2-NEXT:    retq
39  %i0 = insertelement <32 x i8> %x, i8 %s, i32 0
40  ret <32 x i8> %i0
41}
42
43define <16 x i16> @insert_i16_firstelt_of_low_subvector(<16 x i16> %x, i16 %s) {
44; AVX-LABEL: insert_i16_firstelt_of_low_subvector:
45; AVX:       # %bb.0:
46; AVX-NEXT:    vpinsrw $0, %edi, %xmm0, %xmm1
47; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
48; AVX-NEXT:    retq
49;
50; AVX2-LABEL: insert_i16_firstelt_of_low_subvector:
51; AVX2:       # %bb.0:
52; AVX2-NEXT:    vpinsrw $0, %edi, %xmm0, %xmm1
53; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
54; AVX2-NEXT:    retq
55  %i0 = insertelement <16 x i16> %x, i16 %s, i32 0
56  ret <16 x i16> %i0
57}
58
59define <8 x i32> @insert_i32_firstelt_of_low_subvector(<8 x i32> %x, i32 %s) {
60; AVX-LABEL: insert_i32_firstelt_of_low_subvector:
61; AVX:       # %bb.0:
62; AVX-NEXT:    vpinsrd $0, %edi, %xmm0, %xmm1
63; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
64; AVX-NEXT:    retq
65;
66; AVX2-LABEL: insert_i32_firstelt_of_low_subvector:
67; AVX2:       # %bb.0:
68; AVX2-NEXT:    vmovd %edi, %xmm1
69; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
70; AVX2-NEXT:    retq
71  %i0 = insertelement <8 x i32> %x, i32 %s, i32 0
72  ret <8 x i32> %i0
73}
74
75define <4 x i64> @insert_i64_firstelt_of_low_subvector(<4 x i64> %x, i64 %s) {
76; AVX-LABEL: insert_i64_firstelt_of_low_subvector:
77; AVX:       # %bb.0:
78; AVX-NEXT:    vpinsrq $0, %rdi, %xmm0, %xmm1
79; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
80; AVX-NEXT:    retq
81;
82; AVX2-LABEL: insert_i64_firstelt_of_low_subvector:
83; AVX2:       # %bb.0:
84; AVX2-NEXT:    vmovq %rdi, %xmm1
85; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
86; AVX2-NEXT:    retq
87  %i0 = insertelement <4 x i64> %x, i64 %s, i32 0
88  ret <4 x i64> %i0
89}
90
91; 0'th element of high subvector insertion into an AVX register.
92
93define <8 x float> @insert_f32_firstelt_of_high_subvector(<8 x float> %x, float %s) {
94; AVX-LABEL: insert_f32_firstelt_of_high_subvector:
95; AVX:       # %bb.0:
96; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm2
97; AVX-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
98; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
99; AVX-NEXT:    retq
100;
101; AVX2-LABEL: insert_f32_firstelt_of_high_subvector:
102; AVX2:       # %bb.0:
103; AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
104; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7]
105; AVX2-NEXT:    retq
106  %i0 = insertelement <8 x float> %x, float %s, i32 4
107  ret <8 x float> %i0
108}
109
110define <4 x double> @insert_f64_firstelt_of_high_subvector(<4 x double> %x, double %s) {
111; AVX-LABEL: insert_f64_firstelt_of_high_subvector:
112; AVX:       # %bb.0:
113; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm2
114; AVX-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
115; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
116; AVX-NEXT:    retq
117;
118; AVX2-LABEL: insert_f64_firstelt_of_high_subvector:
119; AVX2:       # %bb.0:
120; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
121; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
122; AVX2-NEXT:    retq
123  %i0 = insertelement <4 x double> %x, double %s, i32 2
124  ret <4 x double> %i0
125}
126
127define <32 x i8> @insert_i8_firstelt_of_high_subvector(<32 x i8> %x, i8 %s) {
128; AVX-LABEL: insert_i8_firstelt_of_high_subvector:
129; AVX:       # %bb.0:
130; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
131; AVX-NEXT:    vpinsrb $0, %edi, %xmm1, %xmm1
132; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
133; AVX-NEXT:    retq
134;
135; AVX2-LABEL: insert_i8_firstelt_of_high_subvector:
136; AVX2:       # %bb.0:
137; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
138; AVX2-NEXT:    vpinsrb $0, %edi, %xmm1, %xmm1
139; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
140; AVX2-NEXT:    retq
141  %i0 = insertelement <32 x i8> %x, i8 %s, i32 16
142  ret <32 x i8> %i0
143}
144
145define <16 x i16> @insert_i16_firstelt_of_high_subvector(<16 x i16> %x, i16 %s) {
146; AVX-LABEL: insert_i16_firstelt_of_high_subvector:
147; AVX:       # %bb.0:
148; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
149; AVX-NEXT:    vpinsrw $0, %edi, %xmm1, %xmm1
150; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
151; AVX-NEXT:    retq
152;
153; AVX2-LABEL: insert_i16_firstelt_of_high_subvector:
154; AVX2:       # %bb.0:
155; AVX2-NEXT:    vmovd %edi, %xmm1
156; AVX2-NEXT:    vpbroadcastw %xmm1, %ymm1
157; AVX2-NEXT:    vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
158; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
159; AVX2-NEXT:    retq
160  %i0 = insertelement <16 x i16> %x, i16 %s, i32 8
161  ret <16 x i16> %i0
162}
163
164define <8 x i32> @insert_i32_firstelt_of_high_subvector(<8 x i32> %x, i32 %s) {
165; AVX-LABEL: insert_i32_firstelt_of_high_subvector:
166; AVX:       # %bb.0:
167; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
168; AVX-NEXT:    vpinsrd $0, %edi, %xmm1, %xmm1
169; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
170; AVX-NEXT:    retq
171;
172; AVX2-LABEL: insert_i32_firstelt_of_high_subvector:
173; AVX2:       # %bb.0:
174; AVX2-NEXT:    vmovd %edi, %xmm1
175; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
176; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7]
177; AVX2-NEXT:    retq
178  %i0 = insertelement <8 x i32> %x, i32 %s, i32 4
179  ret <8 x i32> %i0
180}
181
182define <4 x i64> @insert_i64_firstelt_of_high_subvector(<4 x i64> %x, i64 %s) {
183; AVX-LABEL: insert_i64_firstelt_of_high_subvector:
184; AVX:       # %bb.0:
185; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
186; AVX-NEXT:    vpinsrq $0, %rdi, %xmm1, %xmm1
187; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
188; AVX-NEXT:    retq
189;
190; AVX2-LABEL: insert_i64_firstelt_of_high_subvector:
191; AVX2:       # %bb.0:
192; AVX2-NEXT:    vmovq %rdi, %xmm1
193; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
194; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
195; AVX2-NEXT:    retq
196  %i0 = insertelement <4 x i64> %x, i64 %s, i32 2
197  ret <4 x i64> %i0
198}
199
200; element insertion into 0'th element of both subvectors
201
202define <8 x float> @insert_f32_firstelts(<8 x float> %x, float %s) {
203; AVX-LABEL: insert_f32_firstelts:
204; AVX:       # %bb.0:
205; AVX-NEXT:    vblendps {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3]
206; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
207; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
208; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
209; AVX-NEXT:    retq
210;
211; AVX2-LABEL: insert_f32_firstelts:
212; AVX2:       # %bb.0:
213; AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
214; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7]
215; AVX2-NEXT:    retq
216  %i0 = insertelement <8 x float> %x, float %s, i32 0
217  %i1 = insertelement <8 x float> %i0, float %s, i32 4
218  ret <8 x float> %i1
219}
220
221define <4 x double> @insert_f64_firstelts(<4 x double> %x, double %s) {
222; AVX-LABEL: insert_f64_firstelts:
223; AVX:       # %bb.0:
224; AVX-NEXT:    vblendps {{.*#+}} xmm2 = xmm1[0,1],xmm0[2,3]
225; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
226; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
227; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
228; AVX-NEXT:    retq
229;
230; AVX2-LABEL: insert_f64_firstelts:
231; AVX2:       # %bb.0:
232; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
233; AVX2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
234; AVX2-NEXT:    retq
235  %i0 = insertelement <4 x double> %x, double %s, i32 0
236  %i1 = insertelement <4 x double> %i0, double %s, i32 2
237  ret <4 x double> %i1
238}
239
240define <32 x i8> @insert_i8_firstelts(<32 x i8> %x, i8 %s) {
241; AVX-LABEL: insert_i8_firstelts:
242; AVX:       # %bb.0:
243; AVX-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm1
244; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
245; AVX-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm0
246; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
247; AVX-NEXT:    retq
248;
249; AVX2-LABEL: insert_i8_firstelts:
250; AVX2:       # %bb.0:
251; AVX2-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm1
252; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
253; AVX2-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm0
254; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
255; AVX2-NEXT:    retq
256  %i0 = insertelement <32 x i8> %x, i8 %s, i32 0
257  %i1 = insertelement <32 x i8> %i0, i8 %s, i32 16
258  ret <32 x i8> %i1
259}
260
261define <16 x i16> @insert_i16_firstelts(<16 x i16> %x, i16 %s) {
262; AVX-LABEL: insert_i16_firstelts:
263; AVX:       # %bb.0:
264; AVX-NEXT:    vpinsrw $0, %edi, %xmm0, %xmm1
265; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
266; AVX-NEXT:    vpinsrw $0, %edi, %xmm0, %xmm0
267; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
268; AVX-NEXT:    retq
269;
270; AVX2-LABEL: insert_i16_firstelts:
271; AVX2:       # %bb.0:
272; AVX2-NEXT:    vpinsrw $0, %edi, %xmm0, %xmm1
273; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
274; AVX2-NEXT:    vmovd %edi, %xmm1
275; AVX2-NEXT:    vpbroadcastw %xmm1, %ymm1
276; AVX2-NEXT:    vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
277; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
278; AVX2-NEXT:    retq
279  %i0 = insertelement <16 x i16> %x, i16 %s, i32 0
280  %i1 = insertelement <16 x i16> %i0, i16 %s, i32 8
281  ret <16 x i16> %i1
282}
283
284define <8 x i32> @insert_i32_firstelts(<8 x i32> %x, i32 %s) {
285; AVX-LABEL: insert_i32_firstelts:
286; AVX:       # %bb.0:
287; AVX-NEXT:    vpinsrd $0, %edi, %xmm0, %xmm1
288; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
289; AVX-NEXT:    vpinsrd $0, %edi, %xmm0, %xmm0
290; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
291; AVX-NEXT:    retq
292;
293; AVX2-LABEL: insert_i32_firstelts:
294; AVX2:       # %bb.0:
295; AVX2-NEXT:    vmovd %edi, %xmm1
296; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
297; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7]
298; AVX2-NEXT:    retq
299  %i0 = insertelement <8 x i32> %x, i32 %s, i32 0
300  %i1 = insertelement <8 x i32> %i0, i32 %s, i32 4
301  ret <8 x i32> %i1
302}
303
304define <4 x i64> @insert_i64_firstelts(<4 x i64> %x, i64 %s) {
305; AVX-LABEL: insert_i64_firstelts:
306; AVX:       # %bb.0:
307; AVX-NEXT:    vpinsrq $0, %rdi, %xmm0, %xmm1
308; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
309; AVX-NEXT:    vpinsrq $0, %rdi, %xmm0, %xmm0
310; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
311; AVX-NEXT:    retq
312;
313; AVX2-LABEL: insert_i64_firstelts:
314; AVX2:       # %bb.0:
315; AVX2-NEXT:    vmovq %rdi, %xmm1
316; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
317; AVX2-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
318; AVX2-NEXT:    retq
319  %i0 = insertelement <4 x i64> %x, i64 %s, i32 0
320  %i1 = insertelement <4 x i64> %i0, i64 %s, i32 2
321  ret <4 x i64> %i1
322}
323
324; element insertion into two elements of high subvector
325
326define <8 x float> @insert_f32_two_elts_of_high_subvector(<8 x float> %x, float %s) {
327; AVX-LABEL: insert_f32_two_elts_of_high_subvector:
328; AVX:       # %bb.0:
329; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm2
330; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,3]
331; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
332; AVX-NEXT:    retq
333;
334; AVX2-LABEL: insert_f32_two_elts_of_high_subvector:
335; AVX2:       # %bb.0:
336; AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
337; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
338; AVX2-NEXT:    retq
339  %i0 = insertelement <8 x float> %x, float %s, i32 4
340  %i1 = insertelement <8 x float> %i0, float %s, i32 5
341  ret <8 x float> %i1
342}
343
344define <4 x double> @insert_f64_two_elts_of_high_subvector(<4 x double> %x, double %s) {
345; AVX-LABEL: insert_f64_two_elts_of_high_subvector:
346; AVX:       # %bb.0:
347; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
348; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
349; AVX-NEXT:    retq
350;
351; AVX2-LABEL: insert_f64_two_elts_of_high_subvector:
352; AVX2:       # %bb.0:
353; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
354; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
355; AVX2-NEXT:    retq
356  %i0 = insertelement <4 x double> %x, double %s, i32 2
357  %i1 = insertelement <4 x double> %i0, double %s, i32 3
358  ret <4 x double> %i1
359}
360
361define <32 x i8> @insert_i8_two_elts_of_high_subvector(<32 x i8> %x, i8 %s) {
362; AVX-LABEL: insert_i8_two_elts_of_high_subvector:
363; AVX:       # %bb.0:
364; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
365; AVX-NEXT:    vpinsrb $0, %edi, %xmm1, %xmm1
366; AVX-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
367; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
368; AVX-NEXT:    retq
369;
370; AVX2-LABEL: insert_i8_two_elts_of_high_subvector:
371; AVX2:       # %bb.0:
372; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
373; AVX2-NEXT:    vpinsrb $0, %edi, %xmm1, %xmm1
374; AVX2-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
375; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
376; AVX2-NEXT:    retq
377  %i0 = insertelement <32 x i8> %x, i8 %s, i32 16
378  %i1 = insertelement <32 x i8> %i0, i8 %s, i32 17
379  ret <32 x i8> %i1
380}
381
382define <16 x i16> @insert_i16_two_elts_of_high_subvector(<16 x i16> %x, i16 %s) {
383; AVX-LABEL: insert_i16_two_elts_of_high_subvector:
384; AVX:       # %bb.0:
385; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
386; AVX-NEXT:    vpinsrw $0, %edi, %xmm1, %xmm1
387; AVX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
388; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
389; AVX-NEXT:    retq
390;
391; AVX2-LABEL: insert_i16_two_elts_of_high_subvector:
392; AVX2:       # %bb.0:
393; AVX2-NEXT:    vmovd %edi, %xmm1
394; AVX2-NEXT:    vpbroadcastw %xmm1, %ymm1
395; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7]
396; AVX2-NEXT:    retq
397  %i0 = insertelement <16 x i16> %x, i16 %s, i32 8
398  %i1 = insertelement <16 x i16> %i0, i16 %s, i32 9
399  ret <16 x i16> %i1
400}
401
402define <8 x i32> @insert_i32_two_elts_of_high_subvector(<8 x i32> %x, i32 %s) {
403; AVX-LABEL: insert_i32_two_elts_of_high_subvector:
404; AVX:       # %bb.0:
405; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
406; AVX-NEXT:    vpinsrd $0, %edi, %xmm1, %xmm1
407; AVX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
408; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
409; AVX-NEXT:    retq
410;
411; AVX2-LABEL: insert_i32_two_elts_of_high_subvector:
412; AVX2:       # %bb.0:
413; AVX2-NEXT:    vmovd %edi, %xmm1
414; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
415; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
416; AVX2-NEXT:    retq
417  %i0 = insertelement <8 x i32> %x, i32 %s, i32 4
418  %i1 = insertelement <8 x i32> %i0, i32 %s, i32 5
419  ret <8 x i32> %i1
420}
421
422define <4 x i64> @insert_i64_two_elts_of_high_subvector(<4 x i64> %x, i64 %s) {
423; AVX-LABEL: insert_i64_two_elts_of_high_subvector:
424; AVX:       # %bb.0:
425; AVX-NEXT:    vmovq %rdi, %xmm1
426; AVX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
427; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
428; AVX-NEXT:    retq
429;
430; AVX2-LABEL: insert_i64_two_elts_of_high_subvector:
431; AVX2:       # %bb.0:
432; AVX2-NEXT:    vmovq %rdi, %xmm1
433; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
434; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
435; AVX2-NEXT:    retq
436  %i0 = insertelement <4 x i64> %x, i64 %s, i32 2
437  %i1 = insertelement <4 x i64> %i0, i64 %s, i32 3
438  ret <4 x i64> %i1
439}
440
441; element insertion into two elements of low subvector
442
443define <8 x float> @insert_f32_two_elts_of_low_subvector(<8 x float> %x, float %s) {
444; ALL-LABEL: insert_f32_two_elts_of_low_subvector:
445; ALL:       # %bb.0:
446; ALL-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
447; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
448; ALL-NEXT:    retq
449  %i0 = insertelement <8 x float> %x, float %s, i32 0
450  %i1 = insertelement <8 x float> %i0, float %s, i32 1
451  ret <8 x float> %i1
452}
453
454define <4 x double> @insert_f64_two_elts_of_low_subvector(<4 x double> %x, double %s) {
455; ALL-LABEL: insert_f64_two_elts_of_low_subvector:
456; ALL:       # %bb.0:
457; ALL-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
458; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
459; ALL-NEXT:    retq
460  %i0 = insertelement <4 x double> %x, double %s, i32 0
461  %i1 = insertelement <4 x double> %i0, double %s, i32 1
462  ret <4 x double> %i1
463}
464
465define <32 x i8> @insert_i8_two_elts_of_low_subvector(<32 x i8> %x, i8 %s) {
466; AVX-LABEL: insert_i8_two_elts_of_low_subvector:
467; AVX:       # %bb.0:
468; AVX-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm1
469; AVX-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
470; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
471; AVX-NEXT:    retq
472;
473; AVX2-LABEL: insert_i8_two_elts_of_low_subvector:
474; AVX2:       # %bb.0:
475; AVX2-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm1
476; AVX2-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
477; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
478; AVX2-NEXT:    retq
479  %i0 = insertelement <32 x i8> %x, i8 %s, i32 0
480  %i1 = insertelement <32 x i8> %i0, i8 %s, i32 1
481  ret <32 x i8> %i1
482}
483
484define <16 x i16> @insert_i16_two_elts_of_low_subvector(<16 x i16> %x, i16 %s) {
485; AVX-LABEL: insert_i16_two_elts_of_low_subvector:
486; AVX:       # %bb.0:
487; AVX-NEXT:    vpinsrw $0, %edi, %xmm0, %xmm1
488; AVX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
489; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
490; AVX-NEXT:    retq
491;
492; AVX2-LABEL: insert_i16_two_elts_of_low_subvector:
493; AVX2:       # %bb.0:
494; AVX2-NEXT:    vpinsrw $0, %edi, %xmm0, %xmm1
495; AVX2-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
496; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
497; AVX2-NEXT:    retq
498  %i0 = insertelement <16 x i16> %x, i16 %s, i32 0
499  %i1 = insertelement <16 x i16> %i0, i16 %s, i32 1
500  ret <16 x i16> %i1
501}
502
503define <8 x i32> @insert_i32_two_elts_of_low_subvector(<8 x i32> %x, i32 %s) {
504; AVX-LABEL: insert_i32_two_elts_of_low_subvector:
505; AVX:       # %bb.0:
506; AVX-NEXT:    vpinsrd $0, %edi, %xmm0, %xmm1
507; AVX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
508; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
509; AVX-NEXT:    retq
510;
511; AVX2-LABEL: insert_i32_two_elts_of_low_subvector:
512; AVX2:       # %bb.0:
513; AVX2-NEXT:    vmovd %edi, %xmm1
514; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
515; AVX2-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
516; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
517; AVX2-NEXT:    retq
518  %i0 = insertelement <8 x i32> %x, i32 %s, i32 0
519  %i1 = insertelement <8 x i32> %i0, i32 %s, i32 1
520  ret <8 x i32> %i1
521}
522
523define <4 x i64> @insert_i64_two_elts_of_low_subvector(<4 x i64> %x, i64 %s) {
524; AVX-LABEL: insert_i64_two_elts_of_low_subvector:
525; AVX:       # %bb.0:
526; AVX-NEXT:    vmovq %rdi, %xmm1
527; AVX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
528; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
529; AVX-NEXT:    retq
530;
531; AVX2-LABEL: insert_i64_two_elts_of_low_subvector:
532; AVX2:       # %bb.0:
533; AVX2-NEXT:    vmovq %rdi, %xmm1
534; AVX2-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
535; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
536; AVX2-NEXT:    retq
537  %i0 = insertelement <4 x i64> %x, i64 %s, i32 0
538  %i1 = insertelement <4 x i64> %i0, i64 %s, i32 1
539  ret <4 x i64> %i1
540}
541