xref: /llvm-project/llvm/test/CodeGen/X86/buildvec-insertvec.ll (revision 122874c955e06defb619b1afd4e26db482dbbf19)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6
7define void @foo(<3 x float> %in, ptr nocapture %out) nounwind {
8; SSE2-LABEL: foo:
9; SSE2:       # %bb.0:
10; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
11; SSE2-NEXT:    packuswb %xmm0, %xmm0
12; SSE2-NEXT:    packuswb %xmm0, %xmm0
13; SSE2-NEXT:    movd %xmm0, %eax
14; SSE2-NEXT:    orl $-16777216, %eax # imm = 0xFF000000
15; SSE2-NEXT:    movl %eax, (%rdi)
16; SSE2-NEXT:    retq
17;
18; SSE41-LABEL: foo:
19; SSE41:       # %bb.0:
20; SSE41-NEXT:    cvttps2dq %xmm0, %xmm0
21; SSE41-NEXT:    packusdw %xmm0, %xmm0
22; SSE41-NEXT:    packuswb %xmm0, %xmm0
23; SSE41-NEXT:    movl $255, %eax
24; SSE41-NEXT:    pinsrb $3, %eax, %xmm0
25; SSE41-NEXT:    movd %xmm0, (%rdi)
26; SSE41-NEXT:    retq
27;
28; AVX-LABEL: foo:
29; AVX:       # %bb.0:
30; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
31; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
32; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
33; AVX-NEXT:    movl $255, %eax
34; AVX-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
35; AVX-NEXT:    vmovd %xmm0, (%rdi)
36; AVX-NEXT:    retq
37  %t0 = fptoui <3 x float> %in to <3 x i8>
38  %t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
39  %t2 = insertelement <4 x i8> %t1, i8 -1, i32 3
40  store <4 x i8> %t2, ptr %out, align 4
41  ret void
42}
43
44; Verify that the DAGCombiner doesn't wrongly fold a build_vector into a
45; blend with a zero vector if the build_vector contains negative zero.
46
47define <4 x float> @test_negative_zero_1(<4 x float> %A) {
48; SSE2-LABEL: test_negative_zero_1:
49; SSE2:       # %bb.0: # %entry
50; SSE2-NEXT:    xorps %xmm1, %xmm1
51; SSE2-NEXT:    movaps %xmm0, %xmm2
52; SSE2-NEXT:    unpckhps {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
53; SSE2-NEXT:    movss {{.*#+}} xmm1 = [-0.0E+0,0.0E+0,0.0E+0,0.0E+0]
54; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
55; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
56; SSE2-NEXT:    retq
57;
58; SSE41-LABEL: test_negative_zero_1:
59; SSE41:       # %bb.0: # %entry
60; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2],zero
61; SSE41-NEXT:    retq
62;
63; AVX-LABEL: test_negative_zero_1:
64; AVX:       # %bb.0: # %entry
65; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2],zero
66; AVX-NEXT:    retq
67entry:
68  %0 = extractelement <4 x float> %A, i32 0
69  %1 = insertelement <4 x float> undef, float %0, i32 0
70  %2 = insertelement <4 x float> %1, float -0.0, i32 1
71  %3 = extractelement <4 x float> %A, i32 2
72  %4 = insertelement <4 x float> %2, float %3, i32 2
73  %5 = insertelement <4 x float> %4, float 0.0, i32 3
74  ret <4 x float> %5
75}
76
77; FIXME: This could be 'movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]'.
78
79define <2 x double> @test_negative_zero_2(<2 x double> %A) {
80; SSE2-LABEL: test_negative_zero_2:
81; SSE2:       # %bb.0: # %entry
82; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
83; SSE2-NEXT:    retq
84;
85; SSE41-LABEL: test_negative_zero_2:
86; SSE41:       # %bb.0: # %entry
87; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
88; SSE41-NEXT:    retq
89;
90; AVX-LABEL: test_negative_zero_2:
91; AVX:       # %bb.0: # %entry
92; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
93; AVX-NEXT:    retq
94entry:
95  %0 = extractelement <2 x double> %A, i32 0
96  %1 = insertelement <2 x double> undef, double %0, i32 0
97  %2 = insertelement <2 x double> %1, double -0.0, i32 1
98  ret <2 x double> %2
99}
100
101define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float %f2, float %f3) {
102; SSE2-LABEL: test_buildvector_v4f32_register:
103; SSE2:       # %bb.0:
104; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
105; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
106; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
107; SSE2-NEXT:    retq
108;
109; SSE41-LABEL: test_buildvector_v4f32_register:
110; SSE41:       # %bb.0:
111; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
112; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
113; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
114; SSE41-NEXT:    retq
115;
116; AVX-LABEL: test_buildvector_v4f32_register:
117; AVX:       # %bb.0:
118; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
119; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
120; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
121; AVX-NEXT:    retq
122  %ins0 = insertelement <4 x float> undef, float %f0, i32 0
123  %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
124  %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
125  %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
126  ret <4 x float> %ins3
127}
128
129define <4 x float> @test_buildvector_v4f32_load(ptr %p0, ptr %p1, ptr %p2, ptr %p3) {
130; SSE2-LABEL: test_buildvector_v4f32_load:
131; SSE2:       # %bb.0:
132; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
133; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
134; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
135; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
136; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
137; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
138; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
139; SSE2-NEXT:    retq
140;
141; SSE41-LABEL: test_buildvector_v4f32_load:
142; SSE41:       # %bb.0:
143; SSE41-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
144; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
145; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
146; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
147; SSE41-NEXT:    retq
148;
149; AVX-LABEL: test_buildvector_v4f32_load:
150; AVX:       # %bb.0:
151; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
152; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
153; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
154; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
155; AVX-NEXT:    retq
156  %f0 = load float, ptr %p0, align 4
157  %f1 = load float, ptr %p1, align 4
158  %f2 = load float, ptr %p2, align 4
159  %f3 = load float, ptr %p3, align 4
160  %ins0 = insertelement <4 x float> undef, float %f0, i32 0
161  %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
162  %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
163  %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
164  ret <4 x float> %ins3
165}
166
167define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, ptr %p3) {
168; SSE2-LABEL: test_buildvector_v4f32_partial_load:
169; SSE2:       # %bb.0:
170; SSE2-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
171; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
172; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
173; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
174; SSE2-NEXT:    retq
175;
176; SSE41-LABEL: test_buildvector_v4f32_partial_load:
177; SSE41:       # %bb.0:
178; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
179; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
180; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
181; SSE41-NEXT:    retq
182;
183; AVX-LABEL: test_buildvector_v4f32_partial_load:
184; AVX:       # %bb.0:
185; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
186; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
187; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
188; AVX-NEXT:    retq
189  %f3 = load float, ptr %p3, align 4
190  %ins0 = insertelement <4 x float> undef, float %f0, i32 0
191  %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
192  %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
193  %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
194  ret <4 x float> %ins3
195}
196
197define <4 x i32> @test_buildvector_v4i32_register(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
198; SSE2-LABEL: test_buildvector_v4i32_register:
199; SSE2:       # %bb.0:
200; SSE2-NEXT:    movd %ecx, %xmm0
201; SSE2-NEXT:    movd %edx, %xmm1
202; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
203; SSE2-NEXT:    movd %esi, %xmm2
204; SSE2-NEXT:    movd %edi, %xmm0
205; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
206; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
207; SSE2-NEXT:    retq
208;
209; SSE41-LABEL: test_buildvector_v4i32_register:
210; SSE41:       # %bb.0:
211; SSE41-NEXT:    movd %edi, %xmm0
212; SSE41-NEXT:    pinsrd $1, %esi, %xmm0
213; SSE41-NEXT:    pinsrd $2, %edx, %xmm0
214; SSE41-NEXT:    pinsrd $3, %ecx, %xmm0
215; SSE41-NEXT:    retq
216;
217; AVX-LABEL: test_buildvector_v4i32_register:
218; AVX:       # %bb.0:
219; AVX-NEXT:    vmovd %edi, %xmm0
220; AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
221; AVX-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
222; AVX-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
223; AVX-NEXT:    retq
224  %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
225  %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1
226  %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
227  %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
228  ret <4 x i32> %ins3
229}
230
231define <4 x i32> @test_buildvector_v4i32_partial(i32 %a0, i32 %a3) {
232; SSE2-LABEL: test_buildvector_v4i32_partial:
233; SSE2:       # %bb.0:
234; SSE2-NEXT:    movd %edi, %xmm0
235; SSE2-NEXT:    movd %esi, %xmm1
236; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
237; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
238; SSE2-NEXT:    retq
239;
240; SSE41-LABEL: test_buildvector_v4i32_partial:
241; SSE41:       # %bb.0:
242; SSE41-NEXT:    movd %edi, %xmm0
243; SSE41-NEXT:    pinsrd $3, %esi, %xmm0
244; SSE41-NEXT:    retq
245;
246; AVX-LABEL: test_buildvector_v4i32_partial:
247; AVX:       # %bb.0:
248; AVX-NEXT:    vmovd %edi, %xmm0
249; AVX-NEXT:    vpinsrd $3, %esi, %xmm0, %xmm0
250; AVX-NEXT:    retq
251  %ins0 = insertelement <4 x i32> undef, i32   %a0, i32 0
252  %ins1 = insertelement <4 x i32> %ins0, i32 undef, i32 1
253  %ins2 = insertelement <4 x i32> %ins1, i32 undef, i32 2
254  %ins3 = insertelement <4 x i32> %ins2, i32   %a3, i32 3
255  ret <4 x i32> %ins3
256}
257
258define <4 x i32> @test_buildvector_v4i32_register_zero(i32 %a0, i32 %a2, i32 %a3) {
259; SSE-LABEL: test_buildvector_v4i32_register_zero:
260; SSE:       # %bb.0:
261; SSE-NEXT:    movd %edx, %xmm0
262; SSE-NEXT:    movd %esi, %xmm1
263; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
264; SSE-NEXT:    movd %edi, %xmm0
265; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
266; SSE-NEXT:    retq
267;
268; AVX-LABEL: test_buildvector_v4i32_register_zero:
269; AVX:       # %bb.0:
270; AVX-NEXT:    vmovd %edx, %xmm0
271; AVX-NEXT:    vmovd %esi, %xmm1
272; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
273; AVX-NEXT:    vmovd %edi, %xmm1
274; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
275; AVX-NEXT:    retq
276  %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
277  %ins1 = insertelement <4 x i32> %ins0, i32   0, i32 1
278  %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
279  %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
280  ret <4 x i32> %ins3
281}
282
283define <4 x i32> @test_buildvector_v4i32_register_zero_2(i32 %a1, i32 %a2, i32 %a3) {
284; SSE-LABEL: test_buildvector_v4i32_register_zero_2:
285; SSE:       # %bb.0:
286; SSE-NEXT:    movd %edx, %xmm0
287; SSE-NEXT:    movd %esi, %xmm1
288; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
289; SSE-NEXT:    movd %edi, %xmm0
290; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,1]
291; SSE-NEXT:    retq
292;
293; AVX-LABEL: test_buildvector_v4i32_register_zero_2:
294; AVX:       # %bb.0:
295; AVX-NEXT:    vmovd %edx, %xmm0
296; AVX-NEXT:    vmovd %esi, %xmm1
297; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
298; AVX-NEXT:    vmovd %edi, %xmm1
299; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[1,0],xmm0[0,1]
300; AVX-NEXT:    retq
301  %ins0 = insertelement <4 x i32> undef, i32   0, i32 0
302  %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1
303  %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
304  %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
305  ret <4 x i32> %ins3
306}
307
308define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) {
309; SSE2-LABEL: test_buildvector_v8i16_register:
310; SSE2:       # %bb.0:
311; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
312; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
313; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
314; SSE2-NEXT:    movd %r9d, %xmm0
315; SSE2-NEXT:    movd %r8d, %xmm2
316; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
317; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
318; SSE2-NEXT:    movd %ecx, %xmm0
319; SSE2-NEXT:    movd %edx, %xmm1
320; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
321; SSE2-NEXT:    movd %esi, %xmm3
322; SSE2-NEXT:    movd %edi, %xmm0
323; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
324; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
325; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
326; SSE2-NEXT:    retq
327;
328; SSE41-LABEL: test_buildvector_v8i16_register:
329; SSE41:       # %bb.0:
330; SSE41-NEXT:    movd %edi, %xmm0
331; SSE41-NEXT:    pinsrw $1, %esi, %xmm0
332; SSE41-NEXT:    pinsrw $2, %edx, %xmm0
333; SSE41-NEXT:    pinsrw $3, %ecx, %xmm0
334; SSE41-NEXT:    pinsrw $4, %r8d, %xmm0
335; SSE41-NEXT:    pinsrw $5, %r9d, %xmm0
336; SSE41-NEXT:    pinsrw $6, {{[0-9]+}}(%rsp), %xmm0
337; SSE41-NEXT:    pinsrw $7, {{[0-9]+}}(%rsp), %xmm0
338; SSE41-NEXT:    retq
339;
340; AVX-LABEL: test_buildvector_v8i16_register:
341; AVX:       # %bb.0:
342; AVX-NEXT:    vmovd %edi, %xmm0
343; AVX-NEXT:    vpinsrw $1, %esi, %xmm0, %xmm0
344; AVX-NEXT:    vpinsrw $2, %edx, %xmm0, %xmm0
345; AVX-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0
346; AVX-NEXT:    vpinsrw $4, %r8d, %xmm0, %xmm0
347; AVX-NEXT:    vpinsrw $5, %r9d, %xmm0, %xmm0
348; AVX-NEXT:    vpinsrw $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
349; AVX-NEXT:    vpinsrw $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0
350; AVX-NEXT:    retq
351  %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
352  %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1
353  %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2
354  %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
355  %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
356  %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
357  %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6
358  %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7
359  ret <8 x i16> %ins7
360}
361
362define <8 x i16> @test_buildvector_v8i16_partial(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
363; SSE-LABEL: test_buildvector_v8i16_partial:
364; SSE:       # %bb.0:
365; SSE-NEXT:    pxor %xmm0, %xmm0
366; SSE-NEXT:    pinsrw $1, %edi, %xmm0
367; SSE-NEXT:    pinsrw $3, %esi, %xmm0
368; SSE-NEXT:    pinsrw $4, %edx, %xmm0
369; SSE-NEXT:    pinsrw $5, %ecx, %xmm0
370; SSE-NEXT:    retq
371;
372; AVX-LABEL: test_buildvector_v8i16_partial:
373; AVX:       # %bb.0:
374; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
375; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
376; AVX-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
377; AVX-NEXT:    vpinsrw $4, %edx, %xmm0, %xmm0
378; AVX-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
379; AVX-NEXT:    retq
380  %ins0 = insertelement <8 x i16> undef, i16 undef, i32 0
381  %ins1 = insertelement <8 x i16> %ins0, i16   %a1, i32 1
382  %ins2 = insertelement <8 x i16> %ins1, i16 undef, i32 2
383  %ins3 = insertelement <8 x i16> %ins2, i16   %a3, i32 3
384  %ins4 = insertelement <8 x i16> %ins3, i16   %a4, i32 4
385  %ins5 = insertelement <8 x i16> %ins4, i16   %a5, i32 5
386  %ins6 = insertelement <8 x i16> %ins5, i16 undef, i32 6
387  %ins7 = insertelement <8 x i16> %ins6, i16 undef, i32 7
388  ret <8 x i16> %ins7
389}
390
391define <8 x i16> @test_buildvector_v8i16_register_zero(i16 %a0, i16 %a3, i16 %a4, i16 %a5) {
392; SSE-LABEL: test_buildvector_v8i16_register_zero:
393; SSE:       # %bb.0:
394; SSE-NEXT:    movzwl %di, %eax
395; SSE-NEXT:    movd %eax, %xmm0
396; SSE-NEXT:    pinsrw $3, %esi, %xmm0
397; SSE-NEXT:    pinsrw $4, %edx, %xmm0
398; SSE-NEXT:    pinsrw $5, %ecx, %xmm0
399; SSE-NEXT:    retq
400;
401; AVX-LABEL: test_buildvector_v8i16_register_zero:
402; AVX:       # %bb.0:
403; AVX-NEXT:    movzwl %di, %eax
404; AVX-NEXT:    vmovd %eax, %xmm0
405; AVX-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
406; AVX-NEXT:    vpinsrw $4, %edx, %xmm0, %xmm0
407; AVX-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
408; AVX-NEXT:    retq
409  %ins0 = insertelement <8 x i16> undef, i16   %a0, i32 0
410  %ins1 = insertelement <8 x i16> %ins0, i16     0, i32 1
411  %ins2 = insertelement <8 x i16> %ins1, i16     0, i32 2
412  %ins3 = insertelement <8 x i16> %ins2, i16   %a3, i32 3
413  %ins4 = insertelement <8 x i16> %ins3, i16   %a4, i32 4
414  %ins5 = insertelement <8 x i16> %ins4, i16   %a5, i32 5
415  %ins6 = insertelement <8 x i16> %ins5, i16     0, i32 6
416  %ins7 = insertelement <8 x i16> %ins6, i16     0, i32 7
417  ret <8 x i16> %ins7
418}
419
420define <8 x i16> @test_buildvector_v8i16_register_zero_2(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
421; SSE-LABEL: test_buildvector_v8i16_register_zero_2:
422; SSE:       # %bb.0:
423; SSE-NEXT:    pxor %xmm0, %xmm0
424; SSE-NEXT:    pinsrw $1, %edi, %xmm0
425; SSE-NEXT:    pinsrw $3, %esi, %xmm0
426; SSE-NEXT:    pinsrw $4, %edx, %xmm0
427; SSE-NEXT:    pinsrw $5, %ecx, %xmm0
428; SSE-NEXT:    retq
429;
430; AVX-LABEL: test_buildvector_v8i16_register_zero_2:
431; AVX:       # %bb.0:
432; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
433; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
434; AVX-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
435; AVX-NEXT:    vpinsrw $4, %edx, %xmm0, %xmm0
436; AVX-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
437; AVX-NEXT:    retq
438  %ins0 = insertelement <8 x i16> undef, i16     0, i32 0
439  %ins1 = insertelement <8 x i16> %ins0, i16   %a1, i32 1
440  %ins2 = insertelement <8 x i16> %ins1, i16     0, i32 2
441  %ins3 = insertelement <8 x i16> %ins2, i16   %a3, i32 3
442  %ins4 = insertelement <8 x i16> %ins3, i16   %a4, i32 4
443  %ins5 = insertelement <8 x i16> %ins4, i16   %a5, i32 5
444  %ins6 = insertelement <8 x i16> %ins5, i16     0, i32 6
445  %ins7 = insertelement <8 x i16> %ins6, i16     0, i32 7
446  ret <8 x i16> %ins7
447}
448
449define <16 x i8> @test_buildvector_v16i8_register(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) {
450; SSE2-LABEL: test_buildvector_v16i8_register:
451; SSE2:       # %bb.0:
452; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
453; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
454; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
455; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
456; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
457; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
458; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
459; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
460; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
461; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
462; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
463; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
464; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
465; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
466; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
467; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
468; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
469; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
470; SSE2-NEXT:    movd %r9d, %xmm0
471; SSE2-NEXT:    movd %r8d, %xmm2
472; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
473; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
474; SSE2-NEXT:    movd %ecx, %xmm0
475; SSE2-NEXT:    movd %edx, %xmm1
476; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
477; SSE2-NEXT:    movd %esi, %xmm4
478; SSE2-NEXT:    movd %edi, %xmm0
479; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
480; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
481; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
482; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
483; SSE2-NEXT:    retq
484;
485; SSE41-LABEL: test_buildvector_v16i8_register:
486; SSE41:       # %bb.0:
487; SSE41-NEXT:    movd %edi, %xmm0
488; SSE41-NEXT:    pinsrb $1, %esi, %xmm0
489; SSE41-NEXT:    pinsrb $2, %edx, %xmm0
490; SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
491; SSE41-NEXT:    pinsrb $4, %r8d, %xmm0
492; SSE41-NEXT:    pinsrb $5, %r9d, %xmm0
493; SSE41-NEXT:    pinsrb $6, {{[0-9]+}}(%rsp), %xmm0
494; SSE41-NEXT:    pinsrb $7, {{[0-9]+}}(%rsp), %xmm0
495; SSE41-NEXT:    pinsrb $8, {{[0-9]+}}(%rsp), %xmm0
496; SSE41-NEXT:    pinsrb $9, {{[0-9]+}}(%rsp), %xmm0
497; SSE41-NEXT:    pinsrb $10, {{[0-9]+}}(%rsp), %xmm0
498; SSE41-NEXT:    pinsrb $11, {{[0-9]+}}(%rsp), %xmm0
499; SSE41-NEXT:    pinsrb $12, {{[0-9]+}}(%rsp), %xmm0
500; SSE41-NEXT:    pinsrb $13, {{[0-9]+}}(%rsp), %xmm0
501; SSE41-NEXT:    pinsrb $14, {{[0-9]+}}(%rsp), %xmm0
502; SSE41-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
503; SSE41-NEXT:    retq
504;
505; AVX-LABEL: test_buildvector_v16i8_register:
506; AVX:       # %bb.0:
507; AVX-NEXT:    vmovd %edi, %xmm0
508; AVX-NEXT:    vpinsrb $1, %esi, %xmm0, %xmm0
509; AVX-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
510; AVX-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
511; AVX-NEXT:    vpinsrb $4, %r8d, %xmm0, %xmm0
512; AVX-NEXT:    vpinsrb $5, %r9d, %xmm0, %xmm0
513; AVX-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
514; AVX-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0
515; AVX-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0
516; AVX-NEXT:    vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0
517; AVX-NEXT:    vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0
518; AVX-NEXT:    vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0
519; AVX-NEXT:    vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0
520; AVX-NEXT:    vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0
521; AVX-NEXT:    vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0
522; AVX-NEXT:    vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
523; AVX-NEXT:    retq
524  %ins0  = insertelement <16 x i8> undef,  i8 %a0,  i32 0
525  %ins1  = insertelement <16 x i8> %ins0,  i8 %a1,  i32 1
526  %ins2  = insertelement <16 x i8> %ins1,  i8 %a2,  i32 2
527  %ins3  = insertelement <16 x i8> %ins2,  i8 %a3,  i32 3
528  %ins4  = insertelement <16 x i8> %ins3,  i8 %a4,  i32 4
529  %ins5  = insertelement <16 x i8> %ins4,  i8 %a5,  i32 5
530  %ins6  = insertelement <16 x i8> %ins5,  i8 %a6,  i32 6
531  %ins7  = insertelement <16 x i8> %ins6,  i8 %a7,  i32 7
532  %ins8  = insertelement <16 x i8> %ins7,  i8 %a8,  i32 8
533  %ins9  = insertelement <16 x i8> %ins8,  i8 %a9,  i32 9
534  %ins10 = insertelement <16 x i8> %ins9,  i8 %a10, i32 10
535  %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
536  %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
537  %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13
538  %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14
539  %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
540  ret <16 x i8> %ins15
541}
542
543define <16 x i8> @test_buildvector_v16i8_partial(i8 %a2, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
544; SSE2-LABEL: test_buildvector_v16i8_partial:
545; SSE2:       # %bb.0:
546; SSE2-NEXT:    pxor %xmm0, %xmm0
547; SSE2-NEXT:    pinsrw $1, %edi, %xmm0
548; SSE2-NEXT:    pinsrw $3, %esi, %xmm0
549; SSE2-NEXT:    pinsrw $4, %edx, %xmm0
550; SSE2-NEXT:    shll $8, %ecx
551; SSE2-NEXT:    pinsrw $5, %ecx, %xmm0
552; SSE2-NEXT:    pinsrw $6, %r8d, %xmm0
553; SSE2-NEXT:    shll $8, %r9d
554; SSE2-NEXT:    pinsrw $7, %r9d, %xmm0
555; SSE2-NEXT:    retq
556;
557; SSE41-LABEL: test_buildvector_v16i8_partial:
558; SSE41:       # %bb.0:
559; SSE41-NEXT:    pxor %xmm0, %xmm0
560; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
561; SSE41-NEXT:    pinsrb $6, %esi, %xmm0
562; SSE41-NEXT:    pinsrb $8, %edx, %xmm0
563; SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
564; SSE41-NEXT:    pinsrb $12, %r8d, %xmm0
565; SSE41-NEXT:    pinsrb $15, %r9d, %xmm0
566; SSE41-NEXT:    retq
567;
568; AVX-LABEL: test_buildvector_v16i8_partial:
569; AVX:       # %bb.0:
570; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
571; AVX-NEXT:    vpinsrb $2, %edi, %xmm0, %xmm0
572; AVX-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
573; AVX-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
574; AVX-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
575; AVX-NEXT:    vpinsrb $12, %r8d, %xmm0, %xmm0
576; AVX-NEXT:    vpinsrb $15, %r9d, %xmm0, %xmm0
577; AVX-NEXT:    retq
578  %ins0  = insertelement <16 x i8> undef,  i8 undef, i32 0
579  %ins1  = insertelement <16 x i8> %ins0,  i8 undef, i32 1
580  %ins2  = insertelement <16 x i8> %ins1,  i8   %a2, i32 2
581  %ins3  = insertelement <16 x i8> %ins2,  i8 undef, i32 3
582  %ins4  = insertelement <16 x i8> %ins3,  i8 undef, i32 4
583  %ins5  = insertelement <16 x i8> %ins4,  i8 undef, i32 5
584  %ins6  = insertelement <16 x i8> %ins5,  i8   %a6, i32 6
585  %ins7  = insertelement <16 x i8> %ins6,  i8 undef, i32 7
586  %ins8  = insertelement <16 x i8> %ins7,  i8   %a8, i32 8
587  %ins9  = insertelement <16 x i8> %ins8,  i8 undef, i32 9
588  %ins10 = insertelement <16 x i8> %ins9,  i8 undef, i32 10
589  %ins11 = insertelement <16 x i8> %ins10, i8  %a11, i32 11
590  %ins12 = insertelement <16 x i8> %ins11, i8  %a12, i32 12
591  %ins13 = insertelement <16 x i8> %ins12, i8 undef, i32 13
592  %ins14 = insertelement <16 x i8> %ins13, i8 undef, i32 14
593  %ins15 = insertelement <16 x i8> %ins14, i8  %a15, i32 15
594  ret <16 x i8> %ins15
595}
596
597define <16 x i8> @test_buildvector_v16i8_register_zero(i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
598; SSE2-LABEL: test_buildvector_v16i8_register_zero:
599; SSE2:       # %bb.0:
600; SSE2-NEXT:    movzbl %sil, %eax
601; SSE2-NEXT:    movzbl %dil, %esi
602; SSE2-NEXT:    movd %esi, %xmm0
603; SSE2-NEXT:    pinsrw $2, %eax, %xmm0
604; SSE2-NEXT:    movzbl %dl, %eax
605; SSE2-NEXT:    pinsrw $3, %eax, %xmm0
606; SSE2-NEXT:    movzbl %cl, %eax
607; SSE2-NEXT:    pinsrw $4, %eax, %xmm0
608; SSE2-NEXT:    shll $8, %r8d
609; SSE2-NEXT:    pinsrw $5, %r8d, %xmm0
610; SSE2-NEXT:    movzbl %r9b, %eax
611; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
612; SSE2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
613; SSE2-NEXT:    shll $8, %eax
614; SSE2-NEXT:    pinsrw $7, %eax, %xmm0
615; SSE2-NEXT:    retq
616;
617; SSE41-LABEL: test_buildvector_v16i8_register_zero:
618; SSE41:       # %bb.0:
619; SSE41-NEXT:    movzbl %dil, %eax
620; SSE41-NEXT:    movd %eax, %xmm0
621; SSE41-NEXT:    pinsrb $4, %esi, %xmm0
622; SSE41-NEXT:    pinsrb $6, %edx, %xmm0
623; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
624; SSE41-NEXT:    pinsrb $11, %r8d, %xmm0
625; SSE41-NEXT:    pinsrb $12, %r9d, %xmm0
626; SSE41-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
627; SSE41-NEXT:    retq
628;
629; AVX-LABEL: test_buildvector_v16i8_register_zero:
630; AVX:       # %bb.0:
631; AVX-NEXT:    movzbl %dil, %eax
632; AVX-NEXT:    vmovd %eax, %xmm0
633; AVX-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
634; AVX-NEXT:    vpinsrb $6, %edx, %xmm0, %xmm0
635; AVX-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
636; AVX-NEXT:    vpinsrb $11, %r8d, %xmm0, %xmm0
637; AVX-NEXT:    vpinsrb $12, %r9d, %xmm0, %xmm0
638; AVX-NEXT:    vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
639; AVX-NEXT:    retq
640  %ins0  = insertelement <16 x i8> undef,  i8   %a0, i32 0
641  %ins1  = insertelement <16 x i8> %ins0,  i8     0, i32 1
642  %ins2  = insertelement <16 x i8> %ins1,  i8     0, i32 2
643  %ins3  = insertelement <16 x i8> %ins2,  i8     0, i32 3
644  %ins4  = insertelement <16 x i8> %ins3,  i8   %a4, i32 4
645  %ins5  = insertelement <16 x i8> %ins4,  i8     0, i32 5
646  %ins6  = insertelement <16 x i8> %ins5,  i8   %a6, i32 6
647  %ins7  = insertelement <16 x i8> %ins6,  i8     0, i32 7
648  %ins8  = insertelement <16 x i8> %ins7,  i8   %a8, i32 8
649  %ins9  = insertelement <16 x i8> %ins8,  i8     0, i32 9
650  %ins10 = insertelement <16 x i8> %ins9,  i8     0, i32 10
651  %ins11 = insertelement <16 x i8> %ins10, i8  %a11, i32 11
652  %ins12 = insertelement <16 x i8> %ins11, i8  %a12, i32 12
653  %ins13 = insertelement <16 x i8> %ins12, i8     0, i32 13
654  %ins14 = insertelement <16 x i8> %ins13, i8     0, i32 14
655  %ins15 = insertelement <16 x i8> %ins14, i8  %a15, i32 15
656  ret <16 x i8> %ins15
657}
658
659define <16 x i8> @test_buildvector_v16i8_register_zero_2(i8 %a2, i8 %a3, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
660; SSE2-LABEL: test_buildvector_v16i8_register_zero_2:
661; SSE2:       # %bb.0:
662; SSE2-NEXT:    shll $8, %esi
663; SSE2-NEXT:    movzbl %dil, %eax
664; SSE2-NEXT:    orl %esi, %eax
665; SSE2-NEXT:    pxor %xmm0, %xmm0
666; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
667; SSE2-NEXT:    movzbl %dl, %eax
668; SSE2-NEXT:    pinsrw $3, %eax, %xmm0
669; SSE2-NEXT:    movzbl %cl, %eax
670; SSE2-NEXT:    pinsrw $4, %eax, %xmm0
671; SSE2-NEXT:    shll $8, %r8d
672; SSE2-NEXT:    pinsrw $5, %r8d, %xmm0
673; SSE2-NEXT:    movzbl %r9b, %eax
674; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
675; SSE2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
676; SSE2-NEXT:    shll $8, %eax
677; SSE2-NEXT:    pinsrw $7, %eax, %xmm0
678; SSE2-NEXT:    retq
679;
680; SSE41-LABEL: test_buildvector_v16i8_register_zero_2:
681; SSE41:       # %bb.0:
682; SSE41-NEXT:    pxor %xmm0, %xmm0
683; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
684; SSE41-NEXT:    pinsrb $3, %esi, %xmm0
685; SSE41-NEXT:    pinsrb $6, %edx, %xmm0
686; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
687; SSE41-NEXT:    pinsrb $11, %r8d, %xmm0
688; SSE41-NEXT:    pinsrb $12, %r9d, %xmm0
689; SSE41-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
690; SSE41-NEXT:    retq
691;
692; AVX-LABEL: test_buildvector_v16i8_register_zero_2:
693; AVX:       # %bb.0:
694; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
695; AVX-NEXT:    vpinsrb $2, %edi, %xmm0, %xmm0
696; AVX-NEXT:    vpinsrb $3, %esi, %xmm0, %xmm0
697; AVX-NEXT:    vpinsrb $6, %edx, %xmm0, %xmm0
698; AVX-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
699; AVX-NEXT:    vpinsrb $11, %r8d, %xmm0, %xmm0
700; AVX-NEXT:    vpinsrb $12, %r9d, %xmm0, %xmm0
701; AVX-NEXT:    vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
702; AVX-NEXT:    retq
703  %ins0  = insertelement <16 x i8> undef,  i8     0, i32 0
704  %ins1  = insertelement <16 x i8> %ins0,  i8     0, i32 1
705  %ins2  = insertelement <16 x i8> %ins1,  i8   %a2, i32 2
706  %ins3  = insertelement <16 x i8> %ins2,  i8   %a3, i32 3
707  %ins4  = insertelement <16 x i8> %ins3,  i8     0, i32 4
708  %ins5  = insertelement <16 x i8> %ins4,  i8     0, i32 5
709  %ins6  = insertelement <16 x i8> %ins5,  i8   %a6, i32 6
710  %ins7  = insertelement <16 x i8> %ins6,  i8     0, i32 7
711  %ins8  = insertelement <16 x i8> %ins7,  i8   %a8, i32 8
712  %ins9  = insertelement <16 x i8> %ins8,  i8     0, i32 9
713  %ins10 = insertelement <16 x i8> %ins9,  i8     0, i32 10
714  %ins11 = insertelement <16 x i8> %ins10, i8  %a11, i32 11
715  %ins12 = insertelement <16 x i8> %ins11, i8  %a12, i32 12
716  %ins13 = insertelement <16 x i8> %ins12, i8     0, i32 13
717  %ins14 = insertelement <16 x i8> %ins13, i8     0, i32 14
718  %ins15 = insertelement <16 x i8> %ins14, i8  %a15, i32 15
719  ret <16 x i8> %ins15
720}
721
722; PR46461 - Don't let reduceBuildVecExtToExtBuildVec break splat(zero_extend) patterns,
723; resulting in the BUILD_VECTOR lowering to individual insertions into a zero vector.
724
725define void @PR46461(i16 %x, ptr %y) {
726; SSE-LABEL: PR46461:
727; SSE:       # %bb.0:
728; SSE-NEXT:    movzwl %di, %eax
729; SSE-NEXT:    movd %eax, %xmm0
730; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
731; SSE-NEXT:    psrld $1, %xmm0
732; SSE-NEXT:    movdqa %xmm0, 48(%rsi)
733; SSE-NEXT:    movdqa %xmm0, 32(%rsi)
734; SSE-NEXT:    movdqa %xmm0, 16(%rsi)
735; SSE-NEXT:    movdqa %xmm0, (%rsi)
736; SSE-NEXT:    retq
737;
738; AVX1-LABEL: PR46461:
739; AVX1:       # %bb.0:
740; AVX1-NEXT:    movzwl %di, %eax
741; AVX1-NEXT:    vmovd %eax, %xmm0
742; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
743; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
744; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
745; AVX1-NEXT:    vmovaps %ymm0, 32(%rsi)
746; AVX1-NEXT:    vmovaps %ymm0, (%rsi)
747; AVX1-NEXT:    vzeroupper
748; AVX1-NEXT:    retq
749;
750; AVX2-LABEL: PR46461:
751; AVX2:       # %bb.0:
752; AVX2-NEXT:    movzwl %di, %eax
753; AVX2-NEXT:    shrl %eax
754; AVX2-NEXT:    vmovd %eax, %xmm0
755; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
756; AVX2-NEXT:    vmovdqa %ymm0, 32(%rsi)
757; AVX2-NEXT:    vmovdqa %ymm0, (%rsi)
758; AVX2-NEXT:    vzeroupper
759; AVX2-NEXT:    retq
760  %z = lshr i16 %x, 1
761  %a = zext i16 %z to i32
762  %b = insertelement <16 x i32> undef, i32 %a, i32 0
763  %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
764  store <16 x i32> %c, ptr %y
765  ret void
766}
767
768; OSS-Fuzz #5688
769; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=5688
770define <4 x i32> @ossfuzz5688(i32 %a0) {
771; CHECK-LABEL: ossfuzz5688:
772; CHECK:       # %bb.0:
773; CHECK-NEXT:    retq
774  %1 = insertelement <4 x i32> zeroinitializer, i32 -2147483648, i32 %a0
775  %2 = extractelement <4 x i32> %1, i32 %a0
776  %3 = extractelement <4 x i32> <i32 30, i32 53, i32 42, i32 12>, i32 %2
777  %4 = extractelement <4 x i32> zeroinitializer, i32 %2
778  %5 = insertelement <4 x i32> undef, i32 %3, i32 undef
779  store i32 %4, ptr undef
780  ret <4 x i32> %5
781}
782
783; If we do not define all bytes that are extracted, this is a miscompile.
784
785define i32 @PR46586(ptr %p, <4 x i32> %v) {
786; SSE2-LABEL: PR46586:
787; SSE2:       # %bb.0:
788; SSE2-NEXT:    movzbl 3(%rdi), %eax
789; SSE2-NEXT:    pxor %xmm1, %xmm1
790; SSE2-NEXT:    pinsrw $6, %eax, %xmm1
791; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
792; SSE2-NEXT:    movd %xmm1, %eax
793; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
794; SSE2-NEXT:    movd %xmm0, %ecx
795; SSE2-NEXT:    xorl %edx, %edx
796; SSE2-NEXT:    divl %ecx
797; SSE2-NEXT:    movl %edx, %eax
798; SSE2-NEXT:    retq
799;
800; SSE41-LABEL: PR46586:
801; SSE41:       # %bb.0:
802; SSE41-NEXT:    movzbl 3(%rdi), %eax
803; SSE41-NEXT:    extractps $3, %xmm0, %ecx
804; SSE41-NEXT:    xorl %edx, %edx
805; SSE41-NEXT:    divl %ecx
806; SSE41-NEXT:    movl %edx, %eax
807; SSE41-NEXT:    retq
808;
809; AVX-LABEL: PR46586:
810; AVX:       # %bb.0:
811; AVX-NEXT:    movzbl 3(%rdi), %eax
812; AVX-NEXT:    vextractps $3, %xmm0, %ecx
813; AVX-NEXT:    xorl %edx, %edx
814; AVX-NEXT:    divl %ecx
815; AVX-NEXT:    movl %edx, %eax
816; AVX-NEXT:    retq
817  %p3 = getelementptr inbounds i8, ptr %p, i64 3
818  %t25 = load i8, ptr %p
819  %t28 = load i8, ptr %p3
820  %t29 = insertelement <4 x i8> undef, i8 %t25, i32 0
821  %t32 = insertelement <4 x i8> %t29, i8 %t28, i32 3
822  %t33 = zext <4 x i8> %t32 to <4 x i32>
823  %t34 = urem <4 x i32> %t33, %v
824  %t35 = extractelement <4 x i32> %t34, i32 3
825  ret i32 %t35
826}
827
828define void @pr59781(ptr %in, ptr %out) {
829; CHECK-LABEL: pr59781:
830; CHECK:       # %bb.0:
831; CHECK-NEXT:    movzwl (%rdi), %eax
832; CHECK-NEXT:    movzbl 2(%rdi), %ecx
833; CHECK-NEXT:    shll $16, %ecx
834; CHECK-NEXT:    orq %rax, %rcx
835; CHECK-NEXT:    movq %rcx, (%rsi)
836; CHECK-NEXT:    retq
837  %bf.load = load i24, ptr %in, align 8
838  %conv = zext i24 %bf.load to i64
839  %splat.splatinsert = insertelement <1 x i64> zeroinitializer, i64 %conv, i64 0
840  store <1 x i64> %splat.splatinsert, ptr %out, align 8
841  ret void
842}
843