xref: /llvm-project/llvm/test/CodeGen/X86/insert-into-constant-vector.ll (revision be6c752e157638849f1f59f7e2b7ecbe11a022fe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown   -mattr=+sse2     | FileCheck %s --check-prefixes=X86-SSE,X86-SSE2
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2     | FileCheck %s --check-prefixes=X64-SSE,X64-SSE2
4; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown   -mattr=+sse4.1   | FileCheck %s --check-prefixes=X86-SSE,X86-SSE4
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1   | FileCheck %s --check-prefixes=X64-SSE,X64-SSE4
6; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown   -mattr=+avx      | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
7; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx      | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
8; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown   -mattr=+avx2     | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
9; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2     | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
10; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown   -mattr=+avx512f  | FileCheck %s --check-prefixes=X86-AVX,X86-AVX512F
11; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f  | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512F
12
13define <16 x i8> @elt0_v16i8(i8 %x) {
14; X86-SSE2-LABEL: elt0_v16i8:
15; X86-SSE2:       # %bb.0:
16; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
17; X86-SSE2-NEXT:    movd %eax, %xmm0
18; X86-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
19; X86-SSE2-NEXT:    retl
20;
21; X64-SSE2-LABEL: elt0_v16i8:
22; X64-SSE2:       # %bb.0:
23; X64-SSE2-NEXT:    movzbl %dil, %eax
24; X64-SSE2-NEXT:    movd %eax, %xmm0
25; X64-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26; X64-SSE2-NEXT:    retq
27;
28; X86-SSE4-LABEL: elt0_v16i8:
29; X86-SSE4:       # %bb.0:
30; X86-SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
31; X86-SSE4-NEXT:    pinsrb $0, {{[0-9]+}}(%esp), %xmm0
32; X86-SSE4-NEXT:    retl
33;
34; X64-SSE4-LABEL: elt0_v16i8:
35; X64-SSE4:       # %bb.0:
36; X64-SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
37; X64-SSE4-NEXT:    pinsrb $0, %edi, %xmm0
38; X64-SSE4-NEXT:    retq
39;
40; X86-AVX-LABEL: elt0_v16i8:
41; X86-AVX:       # %bb.0:
42; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
43; X86-AVX-NEXT:    vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
44; X86-AVX-NEXT:    retl
45;
46; X64-AVX-LABEL: elt0_v16i8:
47; X64-AVX:       # %bb.0:
48; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
49; X64-AVX-NEXT:    vpinsrb $0, %edi, %xmm0, %xmm0
50; X64-AVX-NEXT:    retq
51   %ins = insertelement <16 x i8> <i8 42, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 %x, i32 0
52   ret <16 x i8> %ins
53}
54
55define <8 x i16> @elt5_v8i16(i16 %x) {
56; X86-SSE2-LABEL: elt5_v8i16:
57; X86-SSE2:       # %bb.0:
58; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [42,1,2,3,4,u,6,7]
59; X86-SSE2-NEXT:    pinsrw $5, {{[0-9]+}}(%esp), %xmm0
60; X86-SSE2-NEXT:    retl
61;
62; X64-SSE2-LABEL: elt5_v8i16:
63; X64-SSE2:       # %bb.0:
64; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [42,1,2,3,4,u,6,7]
65; X64-SSE2-NEXT:    pinsrw $5, %edi, %xmm0
66; X64-SSE2-NEXT:    retq
67;
68; X86-SSE4-LABEL: elt5_v8i16:
69; X86-SSE4:       # %bb.0:
70; X86-SSE4-NEXT:    pmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7]
71; X86-SSE4-NEXT:    pinsrw $5, {{[0-9]+}}(%esp), %xmm0
72; X86-SSE4-NEXT:    retl
73;
74; X64-SSE4-LABEL: elt5_v8i16:
75; X64-SSE4:       # %bb.0:
76; X64-SSE4-NEXT:    pmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7]
77; X64-SSE4-NEXT:    pinsrw $5, %edi, %xmm0
78; X64-SSE4-NEXT:    retq
79;
80; X86-AVX-LABEL: elt5_v8i16:
81; X86-AVX:       # %bb.0:
82; X86-AVX-NEXT:    vpmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7]
83; X86-AVX-NEXT:    vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0
84; X86-AVX-NEXT:    retl
85;
86; X64-AVX-LABEL: elt5_v8i16:
87; X64-AVX:       # %bb.0:
88; X64-AVX-NEXT:    vpmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7]
89; X64-AVX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
90; X64-AVX-NEXT:    retq
91   %ins = insertelement <8 x i16> <i16 42, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 %x, i32 5
92   ret <8 x i16> %ins
93}
94
95define <4 x i32> @elt3_v4i32(i32 %x) {
96; X86-SSE2-LABEL: elt3_v4i32:
97; X86-SSE2:       # %bb.0:
98; X86-SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
99; X86-SSE2-NEXT:    movaps {{.*#+}} xmm0 = [42,1,2,u]
100; X86-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
101; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
102; X86-SSE2-NEXT:    retl
103;
104; X64-SSE2-LABEL: elt3_v4i32:
105; X64-SSE2:       # %bb.0:
106; X64-SSE2-NEXT:    movd %edi, %xmm1
107; X64-SSE2-NEXT:    movaps {{.*#+}} xmm0 = [42,1,2,u]
108; X64-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
109; X64-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
110; X64-SSE2-NEXT:    retq
111;
112; X86-SSE4-LABEL: elt3_v4i32:
113; X86-SSE4:       # %bb.0:
114; X86-SSE4-NEXT:    pmovsxbd {{.*#+}} xmm0 = [42,1,2,0]
115; X86-SSE4-NEXT:    pinsrd $3, {{[0-9]+}}(%esp), %xmm0
116; X86-SSE4-NEXT:    retl
117;
118; X64-SSE4-LABEL: elt3_v4i32:
119; X64-SSE4:       # %bb.0:
120; X64-SSE4-NEXT:    pmovsxbd {{.*#+}} xmm0 = [42,1,2,0]
121; X64-SSE4-NEXT:    pinsrd $3, %edi, %xmm0
122; X64-SSE4-NEXT:    retq
123;
124; X86-AVX-LABEL: elt3_v4i32:
125; X86-AVX:       # %bb.0:
126; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [42,1,2,0]
127; X86-AVX-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
128; X86-AVX-NEXT:    retl
129;
130; X64-AVX-LABEL: elt3_v4i32:
131; X64-AVX:       # %bb.0:
132; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [42,1,2,0]
133; X64-AVX-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
134; X64-AVX-NEXT:    retq
135   %ins = insertelement <4 x i32> <i32 42, i32 1, i32 2, i32 3>, i32 %x, i32 3
136   ret <4 x i32> %ins
137}
138
139define <2 x i64> @elt0_v2i64(i64 %x) {
140; X86-SSE-LABEL: elt0_v2i64:
141; X86-SSE:       # %bb.0:
142; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
143; X86-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
144; X86-SSE-NEXT:    retl
145;
146; X64-SSE2-LABEL: elt0_v2i64:
147; X64-SSE2:       # %bb.0:
148; X64-SSE2-NEXT:    movq %rdi, %xmm1
149; X64-SSE2-NEXT:    movapd {{.*#+}} xmm0 = [u,1]
150; X64-SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
151; X64-SSE2-NEXT:    retq
152;
153; X64-SSE4-LABEL: elt0_v2i64:
154; X64-SSE4:       # %bb.0:
155; X64-SSE4-NEXT:    pmovsxbq {{.*#+}} xmm0 = [1,1]
156; X64-SSE4-NEXT:    pinsrq $0, %rdi, %xmm0
157; X64-SSE4-NEXT:    retq
158;
159; X86-AVX-LABEL: elt0_v2i64:
160; X86-AVX:       # %bb.0:
161; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
162; X86-AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
163; X86-AVX-NEXT:    retl
164;
165; X64-AVX-LABEL: elt0_v2i64:
166; X64-AVX:       # %bb.0:
167; X64-AVX-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [1,1]
168; X64-AVX-NEXT:    vpinsrq $0, %rdi, %xmm0, %xmm0
169; X64-AVX-NEXT:    retq
170   %ins = insertelement <2 x i64> <i64 42, i64 1>, i64 %x, i32 0
171   ret <2 x i64> %ins
172}
173
174define <4 x float> @elt1_v4f32(float %x) {
175; X86-SSE2-LABEL: elt1_v4f32:
176; X86-SSE2:       # %bb.0:
177; X86-SSE2-NEXT:    movaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0]
178; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
179; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
180; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
181; X86-SSE2-NEXT:    retl
182;
183; X64-SSE2-LABEL: elt1_v4f32:
184; X64-SSE2:       # %bb.0:
185; X64-SSE2-NEXT:    movaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0]
186; X64-SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
187; X64-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
188; X64-SSE2-NEXT:    retq
189;
190; X86-SSE4-LABEL: elt1_v4f32:
191; X86-SSE4:       # %bb.0:
192; X86-SSE4-NEXT:    movaps {{.*#+}} xmm0 = [4.2E+1,u,2.0E+0,3.0E+0]
193; X86-SSE4-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
194; X86-SSE4-NEXT:    retl
195;
196; X64-SSE4-LABEL: elt1_v4f32:
197; X64-SSE4:       # %bb.0:
198; X64-SSE4-NEXT:    movaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0]
199; X64-SSE4-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
200; X64-SSE4-NEXT:    movaps %xmm1, %xmm0
201; X64-SSE4-NEXT:    retq
202;
203; X86-AVX-LABEL: elt1_v4f32:
204; X86-AVX:       # %bb.0:
205; X86-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2E+1,u,2.0E+0,3.0E+0]
206; X86-AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
207; X86-AVX-NEXT:    retl
208;
209; X64-AVX-LABEL: elt1_v4f32:
210; X64-AVX:       # %bb.0:
211; X64-AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0]
212; X64-AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
213; X64-AVX-NEXT:    retq
214   %ins = insertelement <4 x float> <float 42.0, float 1.0, float 2.0, float 3.0>, float %x, i32 1
215   ret <4 x float> %ins
216}
217
218define <2 x double> @elt1_v2f64(double %x) {
219; X86-SSE-LABEL: elt1_v2f64:
220; X86-SSE:       # %bb.0:
221; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [4.2E+1,u]
222; X86-SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
223; X86-SSE-NEXT:    retl
224;
225; X64-SSE-LABEL: elt1_v2f64:
226; X64-SSE:       # %bb.0:
227; X64-SSE-NEXT:    movaps {{.*#+}} xmm1 = [4.2E+1,u]
228; X64-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
229; X64-SSE-NEXT:    movaps %xmm1, %xmm0
230; X64-SSE-NEXT:    retq
231;
232; X86-AVX-LABEL: elt1_v2f64:
233; X86-AVX:       # %bb.0:
234; X86-AVX-NEXT:    vmovddup {{.*#+}} xmm0 = [4.2E+1,4.2E+1]
235; X86-AVX-NEXT:    # xmm0 = mem[0,0]
236; X86-AVX-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
237; X86-AVX-NEXT:    retl
238;
239; X64-AVX-LABEL: elt1_v2f64:
240; X64-AVX:       # %bb.0:
241; X64-AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
242; X64-AVX-NEXT:    # xmm1 = mem[0,0]
243; X64-AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
244; X64-AVX-NEXT:    retq
245   %ins = insertelement <2 x double> <double 42.0, double 1.0>, double %x, i32 1
246   ret <2 x double> %ins
247}
248
249define <8 x i32> @elt7_v8i32(i32 %x) {
250; X86-SSE2-LABEL: elt7_v8i32:
251; X86-SSE2:       # %bb.0:
252; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
253; X86-SSE2-NEXT:    movaps {{.*#+}} xmm1 = [4,5,6,u]
254; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
255; X86-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
256; X86-SSE2-NEXT:    movaps {{.*#+}} xmm0 = [42,1,2,3]
257; X86-SSE2-NEXT:    retl
258;
259; X64-SSE2-LABEL: elt7_v8i32:
260; X64-SSE2:       # %bb.0:
261; X64-SSE2-NEXT:    movd %edi, %xmm0
262; X64-SSE2-NEXT:    movaps {{.*#+}} xmm1 = [4,5,6,u]
263; X64-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
264; X64-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
265; X64-SSE2-NEXT:    movaps {{.*#+}} xmm0 = [42,1,2,3]
266; X64-SSE2-NEXT:    retq
267;
268; X86-SSE4-LABEL: elt7_v8i32:
269; X86-SSE4:       # %bb.0:
270; X86-SSE4-NEXT:    pmovsxbd {{.*#+}} xmm1 = [4,5,6,0]
271; X86-SSE4-NEXT:    pinsrd $3, {{[0-9]+}}(%esp), %xmm1
272; X86-SSE4-NEXT:    movaps {{.*#+}} xmm0 = [42,1,2,3]
273; X86-SSE4-NEXT:    retl
274;
275; X64-SSE4-LABEL: elt7_v8i32:
276; X64-SSE4:       # %bb.0:
277; X64-SSE4-NEXT:    pmovsxbd {{.*#+}} xmm1 = [4,5,6,0]
278; X64-SSE4-NEXT:    pinsrd $3, %edi, %xmm1
279; X64-SSE4-NEXT:    movaps {{.*#+}} xmm0 = [42,1,2,3]
280; X64-SSE4-NEXT:    retq
281;
282; X86-AVX-LABEL: elt7_v8i32:
283; X86-AVX:       # %bb.0:
284; X86-AVX-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
285; X86-AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
286; X86-AVX-NEXT:    retl
287;
288; X64-AVX1-LABEL: elt7_v8i32:
289; X64-AVX1:       # %bb.0:
290; X64-AVX1-NEXT:    vmovd %edi, %xmm0
291; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
292; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
293; X64-AVX1-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
294; X64-AVX1-NEXT:    retq
295;
296; X64-AVX2-LABEL: elt7_v8i32:
297; X64-AVX2:       # %bb.0:
298; X64-AVX2-NEXT:    vmovd %edi, %xmm0
299; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
300; X64-AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
301; X64-AVX2-NEXT:    retq
302;
303; X64-AVX512F-LABEL: elt7_v8i32:
304; X64-AVX512F:       # %bb.0:
305; X64-AVX512F-NEXT:    vmovd %edi, %xmm0
306; X64-AVX512F-NEXT:    vpbroadcastd %xmm0, %ymm0
307; X64-AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
308; X64-AVX512F-NEXT:    retq
309   %ins = insertelement <8 x i32> <i32 42, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %x, i32 7
310   ret <8 x i32> %ins
311}
312
313define <8 x float> @elt6_v8f32(float %x) {
314; X86-SSE2-LABEL: elt6_v8f32:
315; X86-SSE2:       # %bb.0:
316; X86-SSE2-NEXT:    movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0]
317; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
318; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
319; X86-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
320; X86-SSE2-NEXT:    movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
321; X86-SSE2-NEXT:    retl
322;
323; X64-SSE2-LABEL: elt6_v8f32:
324; X64-SSE2:       # %bb.0:
325; X64-SSE2-NEXT:    movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0]
326; X64-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
327; X64-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
328; X64-SSE2-NEXT:    movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
329; X64-SSE2-NEXT:    retq
330;
331; X86-SSE4-LABEL: elt6_v8f32:
332; X86-SSE4:       # %bb.0:
333; X86-SSE4-NEXT:    movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0]
334; X86-SSE4-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
335; X86-SSE4-NEXT:    movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
336; X86-SSE4-NEXT:    retl
337;
338; X64-SSE4-LABEL: elt6_v8f32:
339; X64-SSE4:       # %bb.0:
340; X64-SSE4-NEXT:    movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0]
341; X64-SSE4-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
342; X64-SSE4-NEXT:    movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
343; X64-SSE4-NEXT:    retq
344;
345; X86-AVX-LABEL: elt6_v8f32:
346; X86-AVX:       # %bb.0:
347; X86-AVX-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
348; X86-AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
349; X86-AVX-NEXT:    retl
350;
351; X64-AVX1-LABEL: elt6_v8f32:
352; X64-AVX1:       # %bb.0:
353; X64-AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
354; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
355; X64-AVX1-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
356; X64-AVX1-NEXT:    retq
357;
358; X64-AVX2-LABEL: elt6_v8f32:
359; X64-AVX2:       # %bb.0:
360; X64-AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
361; X64-AVX2-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
362; X64-AVX2-NEXT:    retq
363;
364; X64-AVX512F-LABEL: elt6_v8f32:
365; X64-AVX512F:       # %bb.0:
366; X64-AVX512F-NEXT:    vbroadcastss %xmm0, %ymm0
367; X64-AVX512F-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
368; X64-AVX512F-NEXT:    retq
369   %ins = insertelement <8 x float> <float 42.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, float %x, i32 6
370   ret <8 x float> %ins
371}
372
373define <8 x i64> @elt5_v8i64(i64 %x) {
374; X86-SSE-LABEL: elt5_v8i64:
375; X86-SSE:       # %bb.0:
376; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
377; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = [4,0,0,0]
378; X86-SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
379; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [42,0,1,0]
380; X86-SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,0,3,0]
381; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [6,0,7,0]
382; X86-SSE-NEXT:    retl
383;
384; X64-SSE2-LABEL: elt5_v8i64:
385; X64-SSE2:       # %bb.0:
386; X64-SSE2-NEXT:    movq %rdi, %xmm0
387; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4,u]
388; X64-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
389; X64-SSE2-NEXT:    movaps {{.*#+}} xmm0 = [42,1]
390; X64-SSE2-NEXT:    movaps {{.*#+}} xmm1 = [2,3]
391; X64-SSE2-NEXT:    movaps {{.*#+}} xmm3 = [6,7]
392; X64-SSE2-NEXT:    retq
393;
394; X64-SSE4-LABEL: elt5_v8i64:
395; X64-SSE4:       # %bb.0:
396; X64-SSE4-NEXT:    pmovsxbq {{.*#+}} xmm2 = [4,4]
397; X64-SSE4-NEXT:    pinsrq $1, %rdi, %xmm2
398; X64-SSE4-NEXT:    movaps {{.*#+}} xmm0 = [42,1]
399; X64-SSE4-NEXT:    movaps {{.*#+}} xmm1 = [2,3]
400; X64-SSE4-NEXT:    movaps {{.*#+}} xmm3 = [6,7]
401; X64-SSE4-NEXT:    retq
402;
403; X86-AVX1-LABEL: elt5_v8i64:
404; X86-AVX1:       # %bb.0:
405; X86-AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
406; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm1 = [4,0,0,0]
407; X86-AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
408; X86-AVX1-NEXT:    vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
409; X86-AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
410; X86-AVX1-NEXT:    retl
411;
412; X64-AVX1-LABEL: elt5_v8i64:
413; X64-AVX1:       # %bb.0:
414; X64-AVX1-NEXT:    vmovdqa {{.*#+}} ymm0 = [4,u,6,7]
415; X64-AVX1-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
416; X64-AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
417; X64-AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [42,1,2,3]
418; X64-AVX1-NEXT:    retq
419;
420; X86-AVX2-LABEL: elt5_v8i64:
421; X86-AVX2:       # %bb.0:
422; X86-AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
423; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm1 = [4,0,0,0]
424; X86-AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
425; X86-AVX2-NEXT:    vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
426; X86-AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
427; X86-AVX2-NEXT:    retl
428;
429; X64-AVX2-LABEL: elt5_v8i64:
430; X64-AVX2:       # %bb.0:
431; X64-AVX2-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [4,0,6,7]
432; X64-AVX2-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
433; X64-AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
434; X64-AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [42,1,2,3]
435; X64-AVX2-NEXT:    retq
436;
437; X86-AVX512F-LABEL: elt5_v8i64:
438; X86-AVX512F:       # %bb.0:
439; X86-AVX512F-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [42,1,2,3]
440; X86-AVX512F-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
441; X86-AVX512F-NEXT:    vpmovsxbq {{.*#+}} xmm2 = [4,0]
442; X86-AVX512F-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
443; X86-AVX512F-NEXT:    vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
444; X86-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
445; X86-AVX512F-NEXT:    retl
446;
447; X64-AVX512F-LABEL: elt5_v8i64:
448; X64-AVX512F:       # %bb.0:
449; X64-AVX512F-NEXT:    vmovq %rdi, %xmm1
450; X64-AVX512F-NEXT:    vpmovsxbq {{.*#+}} zmm0 = [8,9,10,11,12,0,14,15]
451; X64-AVX512F-NEXT:    vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
452; X64-AVX512F-NEXT:    retq
453   %ins = insertelement <8 x i64> <i64 42, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, i64 %x, i32 5
454   ret <8 x i64> %ins
455}
456
457define <8 x double> @elt1_v8f64(double %x) {
458; X86-SSE-LABEL: elt1_v8f64:
459; X86-SSE:       # %bb.0:
460; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [4.2E+1,u]
461; X86-SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
462; X86-SSE-NEXT:    movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
463; X86-SSE-NEXT:    movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
464; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
465; X86-SSE-NEXT:    retl
466;
467; X64-SSE-LABEL: elt1_v8f64:
468; X64-SSE:       # %bb.0:
469; X64-SSE-NEXT:    movaps {{.*#+}} xmm4 = [4.2E+1,u]
470; X64-SSE-NEXT:    movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0]
471; X64-SSE-NEXT:    movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
472; X64-SSE-NEXT:    movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
473; X64-SSE-NEXT:    movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
474; X64-SSE-NEXT:    movaps %xmm4, %xmm0
475; X64-SSE-NEXT:    retq
476;
477; X86-AVX1-LABEL: elt1_v8f64:
478; X86-AVX1:       # %bb.0:
479; X86-AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2E+1,u,2.0E+0,3.0E+0]
480; X86-AVX1-NEXT:    vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1]
481; X86-AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
482; X86-AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
483; X86-AVX1-NEXT:    retl
484;
485; X64-AVX1-LABEL: elt1_v8f64:
486; X64-AVX1:       # %bb.0:
487; X64-AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [4.2E+1,u,2.0E+0,3.0E+0]
488; X64-AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
489; X64-AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
490; X64-AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
491; X64-AVX1-NEXT:    retq
492;
493; X86-AVX2-LABEL: elt1_v8f64:
494; X86-AVX2:       # %bb.0:
495; X86-AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2E+1,u,2.0E+0,3.0E+0]
496; X86-AVX2-NEXT:    vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1]
497; X86-AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
498; X86-AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
499; X86-AVX2-NEXT:    retl
500;
501; X64-AVX2-LABEL: elt1_v8f64:
502; X64-AVX2:       # %bb.0:
503; X64-AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [4.2E+1,u,2.0E+0,3.0E+0]
504; X64-AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
505; X64-AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
506; X64-AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
507; X64-AVX2-NEXT:    retq
508;
509; X86-AVX512F-LABEL: elt1_v8f64:
510; X86-AVX512F:       # %bb.0:
511; X86-AVX512F-NEXT:    vmovaps {{.*#+}} zmm0 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
512; X86-AVX512F-NEXT:    vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1]
513; X86-AVX512F-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
514; X86-AVX512F-NEXT:    retl
515;
516; X64-AVX512F-LABEL: elt1_v8f64:
517; X64-AVX512F:       # %bb.0:
518; X64-AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
519; X64-AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
520; X64-AVX512F-NEXT:    vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
521; X64-AVX512F-NEXT:    retq
522   %ins = insertelement <8 x double> <double 42.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, double %x, i32 1
523   ret <8 x double> %ins
524}
525
526