xref: /llvm-project/llvm/test/CodeGen/X86/insertelement-zero.ll (revision 70562607ab385423e076b3c9851860291201e509)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2
10
11define <2 x double> @insert_v2f64_z1(<2 x double> %a) {
12; SSE2-LABEL: insert_v2f64_z1:
13; SSE2:       # %bb.0:
14; SSE2-NEXT:    xorpd %xmm1, %xmm1
15; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
16; SSE2-NEXT:    retq
17;
18; SSE3-LABEL: insert_v2f64_z1:
19; SSE3:       # %bb.0:
20; SSE3-NEXT:    xorpd %xmm1, %xmm1
21; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
22; SSE3-NEXT:    retq
23;
24; SSSE3-LABEL: insert_v2f64_z1:
25; SSSE3:       # %bb.0:
26; SSSE3-NEXT:    xorpd %xmm1, %xmm1
27; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
28; SSSE3-NEXT:    retq
29;
30; SSE41-LABEL: insert_v2f64_z1:
31; SSE41:       # %bb.0:
32; SSE41-NEXT:    xorps %xmm1, %xmm1
33; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
34; SSE41-NEXT:    retq
35;
36; AVX-LABEL: insert_v2f64_z1:
37; AVX:       # %bb.0:
38; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
39; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
40; AVX-NEXT:    retq
41  %1 = insertelement <2 x double> %a, double 0.0, i32 0
42  ret <2 x double> %1
43}
44
45define <4 x double> @insert_v4f64_0zz3(<4 x double> %a) {
46; SSE2-LABEL: insert_v4f64_0zz3:
47; SSE2:       # %bb.0:
48; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
49; SSE2-NEXT:    xorpd %xmm2, %xmm2
50; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
51; SSE2-NEXT:    retq
52;
53; SSE3-LABEL: insert_v4f64_0zz3:
54; SSE3:       # %bb.0:
55; SSE3-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
56; SSE3-NEXT:    xorpd %xmm2, %xmm2
57; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
58; SSE3-NEXT:    retq
59;
60; SSSE3-LABEL: insert_v4f64_0zz3:
61; SSSE3:       # %bb.0:
62; SSSE3-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
63; SSSE3-NEXT:    xorpd %xmm2, %xmm2
64; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
65; SSSE3-NEXT:    retq
66;
67; SSE41-LABEL: insert_v4f64_0zz3:
68; SSE41:       # %bb.0:
69; SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
70; SSE41-NEXT:    xorps %xmm2, %xmm2
71; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
72; SSE41-NEXT:    retq
73;
74; AVX-LABEL: insert_v4f64_0zz3:
75; AVX:       # %bb.0:
76; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
77; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
78; AVX-NEXT:    retq
79  %1 = insertelement <4 x double> %a, double 0.0, i32 1
80  %2 = insertelement <4 x double> %1, double 0.0, i32 2
81  ret <4 x double> %2
82}
83
84define <2 x i64> @insert_v2i64_z1(<2 x i64> %a) {
85; SSE2-LABEL: insert_v2i64_z1:
86; SSE2:       # %bb.0:
87; SSE2-NEXT:    xorpd %xmm1, %xmm1
88; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
89; SSE2-NEXT:    retq
90;
91; SSE3-LABEL: insert_v2i64_z1:
92; SSE3:       # %bb.0:
93; SSE3-NEXT:    xorpd %xmm1, %xmm1
94; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
95; SSE3-NEXT:    retq
96;
97; SSSE3-LABEL: insert_v2i64_z1:
98; SSSE3:       # %bb.0:
99; SSSE3-NEXT:    xorpd %xmm1, %xmm1
100; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
101; SSSE3-NEXT:    retq
102;
103; SSE41-LABEL: insert_v2i64_z1:
104; SSE41:       # %bb.0:
105; SSE41-NEXT:    xorps %xmm1, %xmm1
106; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
107; SSE41-NEXT:    retq
108;
109; AVX-LABEL: insert_v2i64_z1:
110; AVX:       # %bb.0:
111; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
112; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
113; AVX-NEXT:    retq
114  %1 = insertelement <2 x i64> %a, i64 0, i32 0
115  ret <2 x i64> %1
116}
117
118define <4 x i64> @insert_v4i64_01z3(<4 x i64> %a) {
119; SSE2-LABEL: insert_v4i64_01z3:
120; SSE2:       # %bb.0:
121; SSE2-NEXT:    xorpd %xmm2, %xmm2
122; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
123; SSE2-NEXT:    retq
124;
125; SSE3-LABEL: insert_v4i64_01z3:
126; SSE3:       # %bb.0:
127; SSE3-NEXT:    xorpd %xmm2, %xmm2
128; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
129; SSE3-NEXT:    retq
130;
131; SSSE3-LABEL: insert_v4i64_01z3:
132; SSSE3:       # %bb.0:
133; SSSE3-NEXT:    xorpd %xmm2, %xmm2
134; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
135; SSSE3-NEXT:    retq
136;
137; SSE41-LABEL: insert_v4i64_01z3:
138; SSE41:       # %bb.0:
139; SSE41-NEXT:    xorps %xmm2, %xmm2
140; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
141; SSE41-NEXT:    retq
142;
143; AVX-LABEL: insert_v4i64_01z3:
144; AVX:       # %bb.0:
145; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
146; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
147; AVX-NEXT:    retq
148  %1 = insertelement <4 x i64> %a, i64 0, i32 2
149  ret <4 x i64> %1
150}
151
152define <4 x float> @insert_v4f32_01z3(<4 x float> %a) {
153; SSE2-LABEL: insert_v4f32_01z3:
154; SSE2:       # %bb.0:
155; SSE2-NEXT:    xorps %xmm1, %xmm1
156; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
157; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
158; SSE2-NEXT:    retq
159;
160; SSE3-LABEL: insert_v4f32_01z3:
161; SSE3:       # %bb.0:
162; SSE3-NEXT:    xorps %xmm1, %xmm1
163; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
164; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
165; SSE3-NEXT:    retq
166;
167; SSSE3-LABEL: insert_v4f32_01z3:
168; SSSE3:       # %bb.0:
169; SSSE3-NEXT:    xorps %xmm1, %xmm1
170; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
171; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
172; SSSE3-NEXT:    retq
173;
174; SSE41-LABEL: insert_v4f32_01z3:
175; SSE41:       # %bb.0:
176; SSE41-NEXT:    xorps %xmm1, %xmm1
177; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
178; SSE41-NEXT:    retq
179;
180; AVX-LABEL: insert_v4f32_01z3:
181; AVX:       # %bb.0:
182; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
183; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
184; AVX-NEXT:    retq
185  %1 = insertelement <4 x float> %a, float 0.0, i32 2
186  ret <4 x float> %1
187}
188
189define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) {
190; SSE2-LABEL: insert_v8f32_z12345z7:
191; SSE2:       # %bb.0:
192; SSE2-NEXT:    xorps %xmm2, %xmm2
193; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
194; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
195; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
196; SSE2-NEXT:    retq
197;
198; SSE3-LABEL: insert_v8f32_z12345z7:
199; SSE3:       # %bb.0:
200; SSE3-NEXT:    xorps %xmm2, %xmm2
201; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
202; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
203; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
204; SSE3-NEXT:    retq
205;
206; SSSE3-LABEL: insert_v8f32_z12345z7:
207; SSSE3:       # %bb.0:
208; SSSE3-NEXT:    xorps %xmm2, %xmm2
209; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
210; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
211; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
212; SSSE3-NEXT:    retq
213;
214; SSE41-LABEL: insert_v8f32_z12345z7:
215; SSE41:       # %bb.0:
216; SSE41-NEXT:    xorps %xmm2, %xmm2
217; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
218; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3]
219; SSE41-NEXT:    retq
220;
221; AVX-LABEL: insert_v8f32_z12345z7:
222; AVX:       # %bb.0:
223; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
224; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
225; AVX-NEXT:    retq
226  %1 = insertelement <8 x float> %a, float 0.0, i32 0
227  %2 = insertelement <8 x float> %1, float 0.0, i32 6
228  ret <8 x float> %2
229}
230
231define <4 x i32> @insert_v4i32_01z3(<4 x i32> %a) {
232; SSE2-LABEL: insert_v4i32_01z3:
233; SSE2:       # %bb.0:
234; SSE2-NEXT:    xorps %xmm1, %xmm1
235; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
236; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
237; SSE2-NEXT:    retq
238;
239; SSE3-LABEL: insert_v4i32_01z3:
240; SSE3:       # %bb.0:
241; SSE3-NEXT:    xorps %xmm1, %xmm1
242; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
243; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
244; SSE3-NEXT:    retq
245;
246; SSSE3-LABEL: insert_v4i32_01z3:
247; SSSE3:       # %bb.0:
248; SSSE3-NEXT:    xorps %xmm1, %xmm1
249; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
250; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
251; SSSE3-NEXT:    retq
252;
253; SSE41-LABEL: insert_v4i32_01z3:
254; SSE41:       # %bb.0:
255; SSE41-NEXT:    xorps %xmm1, %xmm1
256; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
257; SSE41-NEXT:    retq
258;
259; AVX-LABEL: insert_v4i32_01z3:
260; AVX:       # %bb.0:
261; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
262; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
263; AVX-NEXT:    retq
264  %1 = insertelement <4 x i32> %a, i32 0, i32 2
265  ret <4 x i32> %1
266}
267
268define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) {
269; SSE2-LABEL: insert_v8i32_z12345z7:
270; SSE2:       # %bb.0:
271; SSE2-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
272; SSE2-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
273; SSE2-NEXT:    retq
274;
275; SSE3-LABEL: insert_v8i32_z12345z7:
276; SSE3:       # %bb.0:
277; SSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
278; SSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
279; SSE3-NEXT:    retq
280;
281; SSSE3-LABEL: insert_v8i32_z12345z7:
282; SSSE3:       # %bb.0:
283; SSSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
284; SSSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
285; SSSE3-NEXT:    retq
286;
287; SSE41-LABEL: insert_v8i32_z12345z7:
288; SSE41:       # %bb.0:
289; SSE41-NEXT:    xorps %xmm2, %xmm2
290; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
291; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3]
292; SSE41-NEXT:    retq
293;
294; AVX-LABEL: insert_v8i32_z12345z7:
295; AVX:       # %bb.0:
296; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
297; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
298; AVX-NEXT:    retq
299  %1 = insertelement <8 x i32> %a, i32 0, i32 0
300  %2 = insertelement <8 x i32> %1, i32 0, i32 6
301  ret <8 x i32> %2
302}
303
304define <8 x i16> @insert_v8i16_z12345z7(<8 x i16> %a) {
305; SSE2-LABEL: insert_v8i16_z12345z7:
306; SSE2:       # %bb.0:
307; SSE2-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
308; SSE2-NEXT:    retq
309;
310; SSE3-LABEL: insert_v8i16_z12345z7:
311; SSE3:       # %bb.0:
312; SSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
313; SSE3-NEXT:    retq
314;
315; SSSE3-LABEL: insert_v8i16_z12345z7:
316; SSSE3:       # %bb.0:
317; SSSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
318; SSSE3-NEXT:    retq
319;
320; SSE41-LABEL: insert_v8i16_z12345z7:
321; SSE41:       # %bb.0:
322; SSE41-NEXT:    pxor %xmm1, %xmm1
323; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
324; SSE41-NEXT:    retq
325;
326; AVX-LABEL: insert_v8i16_z12345z7:
327; AVX:       # %bb.0:
328; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
329; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
330; AVX-NEXT:    retq
331  %1 = insertelement <8 x i16> %a, i16 0, i32 0
332  %2 = insertelement <8 x i16> %1, i16 0, i32 6
333  ret <8 x i16> %2
334}
335
336define <16 x i16> @insert_v16i16_z12345z789ABCDEz(<16 x i16> %a) {
337; SSE2-LABEL: insert_v16i16_z12345z789ABCDEz:
338; SSE2:       # %bb.0:
339; SSE2-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
340; SSE2-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
341; SSE2-NEXT:    retq
342;
343; SSE3-LABEL: insert_v16i16_z12345z789ABCDEz:
344; SSE3:       # %bb.0:
345; SSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
346; SSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
347; SSE3-NEXT:    retq
348;
349; SSSE3-LABEL: insert_v16i16_z12345z789ABCDEz:
350; SSSE3:       # %bb.0:
351; SSSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
352; SSSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
353; SSSE3-NEXT:    retq
354;
355; SSE41-LABEL: insert_v16i16_z12345z789ABCDEz:
356; SSE41:       # %bb.0:
357; SSE41-NEXT:    pxor %xmm2, %xmm2
358; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
359; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
360; SSE41-NEXT:    retq
361;
362; AVX-LABEL: insert_v16i16_z12345z789ABCDEz:
363; AVX:       # %bb.0:
364; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
365; AVX-NEXT:    retq
366  %1 = insertelement <16 x i16> %a, i16 0, i32 0
367  %2 = insertelement <16 x i16> %1, i16 0, i32 6
368  %3 = insertelement <16 x i16> %2, i16 0, i32 15
369  ret <16 x i16> %3
370}
371
372define <16 x i8> @insert_v16i8_z123456789ABCDEz(<16 x i8> %a) {
373; SSE-LABEL: insert_v16i8_z123456789ABCDEz:
374; SSE:       # %bb.0:
375; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
376; SSE-NEXT:    retq
377;
378; AVX-LABEL: insert_v16i8_z123456789ABCDEz:
379; AVX:       # %bb.0:
380; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
381; AVX-NEXT:    retq
382  %1 = insertelement <16 x i8> %a, i8 0, i32 0
383  %2 = insertelement <16 x i8> %1, i8 0, i32 15
384  ret <16 x i8> %2
385}
386
387define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) {
388; SSE2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
389; SSE2:       # %bb.0:
390; SSE2-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
391; SSE2-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
392; SSE2-NEXT:    retq
393;
394; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
395; SSE3:       # %bb.0:
396; SSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
397; SSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
398; SSE3-NEXT:    retq
399;
400; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
401; SSSE3:       # %bb.0:
402; SSSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
403; SSSE3-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
404; SSSE3-NEXT:    retq
405;
406; SSE41-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
407; SSE41:       # %bb.0:
408; SSE41-NEXT:    pxor %xmm2, %xmm2
409; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
410; SSE41-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
411; SSE41-NEXT:    retq
412;
413; AVX-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
414; AVX:       # %bb.0:
415; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
416; AVX-NEXT:    retq
417  %1 = insertelement <32 x i8> %a, i8 0, i32 0
418  %2 = insertelement <32 x i8> %1, i8 0, i32 15
419  %3 = insertelement <32 x i8> %2, i8 0, i32 30
420  %4 = insertelement <32 x i8> %3, i8 0, i32 31
421  ret <32 x i8> %4
422}
423
424define <4 x i32> @PR41512(i32 %x, i32 %y) {
425; SSE-LABEL: PR41512:
426; SSE:       # %bb.0:
427; SSE-NEXT:    movd %edi, %xmm0
428; SSE-NEXT:    movd %esi, %xmm1
429; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
430; SSE-NEXT:    retq
431;
432; AVX-LABEL: PR41512:
433; AVX:       # %bb.0:
434; AVX-NEXT:    vmovd %edi, %xmm0
435; AVX-NEXT:    vmovd %esi, %xmm1
436; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
437; AVX-NEXT:    retq
438  %ins1 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %x, i32 0
439  %ins2 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %y, i32 0
440  %r = shufflevector <4 x i32> %ins1, <4 x i32> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
441  ret <4 x i32> %r
442}
443
444define <4 x i64> @PR41512_v4i64(i64 %x, i64 %y) {
445; SSE-LABEL: PR41512_v4i64:
446; SSE:       # %bb.0:
447; SSE-NEXT:    movq %rdi, %xmm0
448; SSE-NEXT:    movq %rsi, %xmm1
449; SSE-NEXT:    retq
450;
451; AVX1-LABEL: PR41512_v4i64:
452; AVX1:       # %bb.0:
453; AVX1-NEXT:    vmovq %rdi, %xmm0
454; AVX1-NEXT:    vmovq %rsi, %xmm1
455; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
456; AVX1-NEXT:    retq
457;
458; AVX2-LABEL: PR41512_v4i64:
459; AVX2:       # %bb.0:
460; AVX2-NEXT:    vmovq %rdi, %xmm0
461; AVX2-NEXT:    vmovq %rsi, %xmm1
462; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
463; AVX2-NEXT:    retq
464  %ins1 = insertelement <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, i64 %x, i32 0
465  %ins2 = insertelement <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, i64 %y, i32 0
466  %r = shufflevector <4 x i64> %ins1, <4 x i64> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
467  ret <4 x i64> %r
468}
469
470define <8 x float> @PR41512_v8f32(float %x, float %y) {
471; SSE2-LABEL: PR41512_v8f32:
472; SSE2:       # %bb.0:
473; SSE2-NEXT:    xorps %xmm2, %xmm2
474; SSE2-NEXT:    xorps %xmm3, %xmm3
475; SSE2-NEXT:    movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3]
476; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
477; SSE2-NEXT:    movaps %xmm3, %xmm0
478; SSE2-NEXT:    movaps %xmm2, %xmm1
479; SSE2-NEXT:    retq
480;
481; SSE3-LABEL: PR41512_v8f32:
482; SSE3:       # %bb.0:
483; SSE3-NEXT:    xorps %xmm2, %xmm2
484; SSE3-NEXT:    xorps %xmm3, %xmm3
485; SSE3-NEXT:    movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3]
486; SSE3-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
487; SSE3-NEXT:    movaps %xmm3, %xmm0
488; SSE3-NEXT:    movaps %xmm2, %xmm1
489; SSE3-NEXT:    retq
490;
491; SSSE3-LABEL: PR41512_v8f32:
492; SSSE3:       # %bb.0:
493; SSSE3-NEXT:    xorps %xmm2, %xmm2
494; SSSE3-NEXT:    xorps %xmm3, %xmm3
495; SSSE3-NEXT:    movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3]
496; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
497; SSSE3-NEXT:    movaps %xmm3, %xmm0
498; SSSE3-NEXT:    movaps %xmm2, %xmm1
499; SSSE3-NEXT:    retq
500;
501; SSE41-LABEL: PR41512_v8f32:
502; SSE41:       # %bb.0:
503; SSE41-NEXT:    xorps %xmm2, %xmm2
504; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
505; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
506; SSE41-NEXT:    retq
507;
508; AVX-LABEL: PR41512_v8f32:
509; AVX:       # %bb.0:
510; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
511; AVX-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
512; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
513; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
514; AVX-NEXT:    retq
515  %ins1 = insertelement <8 x float> zeroinitializer, float %x, i32 0
516  %ins2 = insertelement <8 x float> zeroinitializer, float %y, i32 0
517  %r = shufflevector <8 x float> %ins1, <8 x float> %ins2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
518  ret <8 x float> %r
519}
520
521define <4 x i32> @PR41512_loads(ptr %p1, ptr %p2) {
522; SSE-LABEL: PR41512_loads:
523; SSE:       # %bb.0:
524; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
525; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
526; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
527; SSE-NEXT:    retq
528;
529; AVX-LABEL: PR41512_loads:
530; AVX:       # %bb.0:
531; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
532; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
533; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
534; AVX-NEXT:    retq
535  %x = load i32, ptr %p1
536  %y = load i32, ptr %p2
537  %ins1 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %x, i32 0
538  %ins2 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %y, i32 0
539  %r = shufflevector <4 x i32> %ins1, <4 x i32> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
540  ret <4 x i32> %r
541}
542