xref: /llvm-project/llvm/test/CodeGen/X86/insertelement-var-index.ll (revision 170c525d79a4ab3659041b0655ac9697768fc915)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2   | FileCheck %s --check-prefixes=ALL,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx    | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX1
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2   | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2
6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl  | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512BW
8; RUN: llc < %s -mtriple=i686-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,X86AVX2
9
10define <16 x i8> @undef_index(i8 %x) nounwind {
11; ALL-LABEL: undef_index:
12; ALL:       # %bb.0:
13; ALL-NEXT:    ret{{[l|q]}}
14  %ins = insertelement <16 x i8> undef, i8 %x, i64 undef
15  ret <16 x i8> %ins
16}
17
18define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind {
19; ALL-LABEL: undef_scalar:
20; ALL:       # %bb.0:
21; ALL-NEXT:    ret{{[l|q]}}
22  %ins = insertelement <16 x i8> %x, i8 undef, i32 %index
23  ret <16 x i8> %ins
24}
25
26;
27; Insertion into undef vectors
28;
29
30define <16 x i8> @arg_i8_v16i8_undef(i8 %x, i32 %y) nounwind {
31; SSE2-LABEL: arg_i8_v16i8_undef:
32; SSE2:       # %bb.0:
33; SSE2-NEXT:    movd %edi, %xmm0
34; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
35; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
36; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
37; SSE2-NEXT:    retq
38;
39; SSE41-LABEL: arg_i8_v16i8_undef:
40; SSE41:       # %bb.0:
41; SSE41-NEXT:    movd %edi, %xmm0
42; SSE41-NEXT:    pxor %xmm1, %xmm1
43; SSE41-NEXT:    pshufb %xmm1, %xmm0
44; SSE41-NEXT:    retq
45;
46; AVX1-LABEL: arg_i8_v16i8_undef:
47; AVX1:       # %bb.0:
48; AVX1-NEXT:    vmovd %edi, %xmm0
49; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
50; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
51; AVX1-NEXT:    retq
52;
53; AVX2-LABEL: arg_i8_v16i8_undef:
54; AVX2:       # %bb.0:
55; AVX2-NEXT:    vmovd %edi, %xmm0
56; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
57; AVX2-NEXT:    retq
58;
59; AVX512F-LABEL: arg_i8_v16i8_undef:
60; AVX512F:       # %bb.0:
61; AVX512F-NEXT:    vmovd %edi, %xmm0
62; AVX512F-NEXT:    vpbroadcastb %xmm0, %xmm0
63; AVX512F-NEXT:    retq
64;
65; AVX512BW-LABEL: arg_i8_v16i8_undef:
66; AVX512BW:       # %bb.0:
67; AVX512BW-NEXT:    vpbroadcastb %edi, %xmm0
68; AVX512BW-NEXT:    retq
69;
70; X86AVX2-LABEL: arg_i8_v16i8_undef:
71; X86AVX2:       # %bb.0:
72; X86AVX2-NEXT:    vpbroadcastb {{[0-9]+}}(%esp), %xmm0
73; X86AVX2-NEXT:    retl
74  %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
75  ret <16 x i8> %ins
76}
77
78define <8 x i16> @arg_i16_v8i16_undef(i16 %x, i32 %y) nounwind {
79; SSE-LABEL: arg_i16_v8i16_undef:
80; SSE:       # %bb.0:
81; SSE-NEXT:    movd %edi, %xmm0
82; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
83; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
84; SSE-NEXT:    retq
85;
86; AVX1-LABEL: arg_i16_v8i16_undef:
87; AVX1:       # %bb.0:
88; AVX1-NEXT:    vmovd %edi, %xmm0
89; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
90; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
91; AVX1-NEXT:    retq
92;
93; AVX2-LABEL: arg_i16_v8i16_undef:
94; AVX2:       # %bb.0:
95; AVX2-NEXT:    vmovd %edi, %xmm0
96; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
97; AVX2-NEXT:    retq
98;
99; AVX512F-LABEL: arg_i16_v8i16_undef:
100; AVX512F:       # %bb.0:
101; AVX512F-NEXT:    vmovd %edi, %xmm0
102; AVX512F-NEXT:    vpbroadcastw %xmm0, %xmm0
103; AVX512F-NEXT:    retq
104;
105; AVX512BW-LABEL: arg_i16_v8i16_undef:
106; AVX512BW:       # %bb.0:
107; AVX512BW-NEXT:    vpbroadcastw %edi, %xmm0
108; AVX512BW-NEXT:    retq
109;
110; X86AVX2-LABEL: arg_i16_v8i16_undef:
111; X86AVX2:       # %bb.0:
112; X86AVX2-NEXT:    vpbroadcastw {{[0-9]+}}(%esp), %xmm0
113; X86AVX2-NEXT:    retl
114  %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
115  ret <8 x i16> %ins
116}
117
118define <4 x i32> @arg_i32_v4i32_undef(i32 %x, i32 %y) nounwind {
119; SSE-LABEL: arg_i32_v4i32_undef:
120; SSE:       # %bb.0:
121; SSE-NEXT:    movd %edi, %xmm0
122; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
123; SSE-NEXT:    retq
124;
125; AVX1-LABEL: arg_i32_v4i32_undef:
126; AVX1:       # %bb.0:
127; AVX1-NEXT:    vmovd %edi, %xmm0
128; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
129; AVX1-NEXT:    retq
130;
131; AVX2-LABEL: arg_i32_v4i32_undef:
132; AVX2:       # %bb.0:
133; AVX2-NEXT:    vmovd %edi, %xmm0
134; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
135; AVX2-NEXT:    retq
136;
137; AVX512-LABEL: arg_i32_v4i32_undef:
138; AVX512:       # %bb.0:
139; AVX512-NEXT:    vpbroadcastd %edi, %xmm0
140; AVX512-NEXT:    retq
141;
142; X86AVX2-LABEL: arg_i32_v4i32_undef:
143; X86AVX2:       # %bb.0:
144; X86AVX2-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm0
145; X86AVX2-NEXT:    retl
146  %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
147  ret <4 x i32> %ins
148}
149
150define <2 x i64> @arg_i64_v2i64_undef(i64 %x, i32 %y) nounwind {
151; SSE-LABEL: arg_i64_v2i64_undef:
152; SSE:       # %bb.0:
153; SSE-NEXT:    movq %rdi, %xmm0
154; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
155; SSE-NEXT:    retq
156;
157; AVX1-LABEL: arg_i64_v2i64_undef:
158; AVX1:       # %bb.0:
159; AVX1-NEXT:    vmovq %rdi, %xmm0
160; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
161; AVX1-NEXT:    retq
162;
163; AVX2-LABEL: arg_i64_v2i64_undef:
164; AVX2:       # %bb.0:
165; AVX2-NEXT:    vmovq %rdi, %xmm0
166; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
167; AVX2-NEXT:    retq
168;
169; AVX512-LABEL: arg_i64_v2i64_undef:
170; AVX512:       # %bb.0:
171; AVX512-NEXT:    vpbroadcastq %rdi, %xmm0
172; AVX512-NEXT:    retq
173;
174; X86AVX2-LABEL: arg_i64_v2i64_undef:
175; X86AVX2:       # %bb.0:
176; X86AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
177; X86AVX2-NEXT:    retl
178  %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
179  ret <2 x i64> %ins
180}
181
182define <4 x float> @arg_f32_v4f32_undef(float %x, i32 %y) nounwind {
183; SSE-LABEL: arg_f32_v4f32_undef:
184; SSE:       # %bb.0:
185; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
186; SSE-NEXT:    retq
187;
188; AVX1-LABEL: arg_f32_v4f32_undef:
189; AVX1:       # %bb.0:
190; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
191; AVX1-NEXT:    retq
192;
193; AVX2-LABEL: arg_f32_v4f32_undef:
194; AVX2:       # %bb.0:
195; AVX2-NEXT:    vbroadcastss %xmm0, %xmm0
196; AVX2-NEXT:    retq
197;
198; AVX512-LABEL: arg_f32_v4f32_undef:
199; AVX512:       # %bb.0:
200; AVX512-NEXT:    vbroadcastss %xmm0, %xmm0
201; AVX512-NEXT:    retq
202;
203; X86AVX2-LABEL: arg_f32_v4f32_undef:
204; X86AVX2:       # %bb.0:
205; X86AVX2-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm0
206; X86AVX2-NEXT:    retl
207  %ins = insertelement <4 x float> undef, float %x, i32 %y
208  ret <4 x float> %ins
209}
210
211define <2 x double> @arg_f64_v2f64_undef(double %x, i32 %y) nounwind {
212; SSE2-LABEL: arg_f64_v2f64_undef:
213; SSE2:       # %bb.0:
214; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
215; SSE2-NEXT:    retq
216;
217; SSE41-LABEL: arg_f64_v2f64_undef:
218; SSE41:       # %bb.0:
219; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
220; SSE41-NEXT:    retq
221;
222; AVX-LABEL: arg_f64_v2f64_undef:
223; AVX:       # %bb.0:
224; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
225; AVX-NEXT:    retq
226;
227; X86AVX2-LABEL: arg_f64_v2f64_undef:
228; X86AVX2:       # %bb.0:
229; X86AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
230; X86AVX2-NEXT:    retl
231  %ins = insertelement <2 x double> undef, double %x, i32 %y
232  ret <2 x double> %ins
233}
234
235define <16 x i8> @load_i8_v16i8_undef(ptr %p, i32 %y) nounwind {
236; SSE2-LABEL: load_i8_v16i8_undef:
237; SSE2:       # %bb.0:
238; SSE2-NEXT:    movzbl (%rdi), %eax
239; SSE2-NEXT:    movd %eax, %xmm0
240; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
241; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
242; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
243; SSE2-NEXT:    retq
244;
245; SSE41-LABEL: load_i8_v16i8_undef:
246; SSE41:       # %bb.0:
247; SSE41-NEXT:    movzbl (%rdi), %eax
248; SSE41-NEXT:    movd %eax, %xmm0
249; SSE41-NEXT:    pxor %xmm1, %xmm1
250; SSE41-NEXT:    pshufb %xmm1, %xmm0
251; SSE41-NEXT:    retq
252;
253; AVX1-LABEL: load_i8_v16i8_undef:
254; AVX1:       # %bb.0:
255; AVX1-NEXT:    movzbl (%rdi), %eax
256; AVX1-NEXT:    vmovd %eax, %xmm0
257; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
258; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
259; AVX1-NEXT:    retq
260;
261; AVX2-LABEL: load_i8_v16i8_undef:
262; AVX2:       # %bb.0:
263; AVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
264; AVX2-NEXT:    retq
265;
266; AVX512-LABEL: load_i8_v16i8_undef:
267; AVX512:       # %bb.0:
268; AVX512-NEXT:    vpbroadcastb (%rdi), %xmm0
269; AVX512-NEXT:    retq
270;
271; X86AVX2-LABEL: load_i8_v16i8_undef:
272; X86AVX2:       # %bb.0:
273; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
274; X86AVX2-NEXT:    vpbroadcastb (%eax), %xmm0
275; X86AVX2-NEXT:    retl
276  %x = load i8, ptr %p
277  %ins = insertelement <16 x i8> undef, i8 %x, i32 %y
278  ret <16 x i8> %ins
279}
280
281define <8 x i16> @load_i16_v8i16_undef(ptr %p, i32 %y) nounwind {
282; SSE-LABEL: load_i16_v8i16_undef:
283; SSE:       # %bb.0:
284; SSE-NEXT:    movzwl (%rdi), %eax
285; SSE-NEXT:    movd %eax, %xmm0
286; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
287; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
288; SSE-NEXT:    retq
289;
290; AVX1-LABEL: load_i16_v8i16_undef:
291; AVX1:       # %bb.0:
292; AVX1-NEXT:    movzwl (%rdi), %eax
293; AVX1-NEXT:    vmovd %eax, %xmm0
294; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
295; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
296; AVX1-NEXT:    retq
297;
298; AVX2-LABEL: load_i16_v8i16_undef:
299; AVX2:       # %bb.0:
300; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
301; AVX2-NEXT:    retq
302;
303; AVX512-LABEL: load_i16_v8i16_undef:
304; AVX512:       # %bb.0:
305; AVX512-NEXT:    vpbroadcastw (%rdi), %xmm0
306; AVX512-NEXT:    retq
307;
308; X86AVX2-LABEL: load_i16_v8i16_undef:
309; X86AVX2:       # %bb.0:
310; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
311; X86AVX2-NEXT:    vpbroadcastw (%eax), %xmm0
312; X86AVX2-NEXT:    retl
313  %x = load i16, ptr %p
314  %ins = insertelement <8 x i16> undef, i16 %x, i32 %y
315  ret <8 x i16> %ins
316}
317
318define <4 x i32> @load_i32_v4i32_undef(ptr %p, i32 %y) nounwind {
319; SSE-LABEL: load_i32_v4i32_undef:
320; SSE:       # %bb.0:
321; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
322; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
323; SSE-NEXT:    retq
324;
325; AVX-LABEL: load_i32_v4i32_undef:
326; AVX:       # %bb.0:
327; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
328; AVX-NEXT:    retq
329;
330; X86AVX2-LABEL: load_i32_v4i32_undef:
331; X86AVX2:       # %bb.0:
332; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
333; X86AVX2-NEXT:    vbroadcastss (%eax), %xmm0
334; X86AVX2-NEXT:    retl
335  %x = load i32, ptr %p
336  %ins = insertelement <4 x i32> undef, i32 %x, i32 %y
337  ret <4 x i32> %ins
338}
339
340define <2 x i64> @load_i64_v2i64_undef(ptr %p, i32 %y) nounwind {
341; SSE-LABEL: load_i64_v2i64_undef:
342; SSE:       # %bb.0:
343; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
344; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
345; SSE-NEXT:    retq
346;
347; AVX-LABEL: load_i64_v2i64_undef:
348; AVX:       # %bb.0:
349; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
350; AVX-NEXT:    retq
351;
352; X86AVX2-LABEL: load_i64_v2i64_undef:
353; X86AVX2:       # %bb.0:
354; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
355; X86AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
356; X86AVX2-NEXT:    retl
357  %x = load i64, ptr %p
358  %ins = insertelement <2 x i64> undef, i64 %x, i32 %y
359  ret <2 x i64> %ins
360}
361
362define <4 x float> @load_f32_v4f32_undef(ptr %p, i32 %y) nounwind {
363; SSE-LABEL: load_f32_v4f32_undef:
364; SSE:       # %bb.0:
365; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
366; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
367; SSE-NEXT:    retq
368;
369; AVX-LABEL: load_f32_v4f32_undef:
370; AVX:       # %bb.0:
371; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
372; AVX-NEXT:    retq
373;
374; X86AVX2-LABEL: load_f32_v4f32_undef:
375; X86AVX2:       # %bb.0:
376; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
377; X86AVX2-NEXT:    vbroadcastss (%eax), %xmm0
378; X86AVX2-NEXT:    retl
379  %x = load float, ptr %p
380  %ins = insertelement <4 x float> undef, float %x, i32 %y
381  ret <4 x float> %ins
382}
383
384define <2 x double> @load_f64_v2f64_undef(ptr %p, i32 %y) nounwind {
385; SSE2-LABEL: load_f64_v2f64_undef:
386; SSE2:       # %bb.0:
387; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
388; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
389; SSE2-NEXT:    retq
390;
391; SSE41-LABEL: load_f64_v2f64_undef:
392; SSE41:       # %bb.0:
393; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
394; SSE41-NEXT:    retq
395;
396; AVX-LABEL: load_f64_v2f64_undef:
397; AVX:       # %bb.0:
398; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
399; AVX-NEXT:    retq
400;
401; X86AVX2-LABEL: load_f64_v2f64_undef:
402; X86AVX2:       # %bb.0:
403; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
404; X86AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
405; X86AVX2-NEXT:    retl
406  %x = load double, ptr %p
407  %ins = insertelement <2 x double> undef, double %x, i32 %y
408  ret <2 x double> %ins
409}
410
411define <32 x i8> @arg_i8_v32i8_undef(i8 %x, i32 %y) nounwind {
412; SSE-LABEL: arg_i8_v32i8_undef:
413; SSE:       # %bb.0:
414; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
415; SSE-NEXT:    andl $31, %esi
416; SSE-NEXT:    movb %dil, -40(%rsp,%rsi)
417; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
418; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
419; SSE-NEXT:    retq
420;
421; AVX1-LABEL: arg_i8_v32i8_undef:
422; AVX1:       # %bb.0:
423; AVX1-NEXT:    vmovd %edi, %xmm0
424; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
425; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
426; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
427; AVX1-NEXT:    retq
428;
429; AVX2-LABEL: arg_i8_v32i8_undef:
430; AVX2:       # %bb.0:
431; AVX2-NEXT:    vmovd %edi, %xmm0
432; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
433; AVX2-NEXT:    retq
434;
435; AVX512F-LABEL: arg_i8_v32i8_undef:
436; AVX512F:       # %bb.0:
437; AVX512F-NEXT:    vmovd %edi, %xmm0
438; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
439; AVX512F-NEXT:    retq
440;
441; AVX512BW-LABEL: arg_i8_v32i8_undef:
442; AVX512BW:       # %bb.0:
443; AVX512BW-NEXT:    vpbroadcastb %edi, %ymm0
444; AVX512BW-NEXT:    retq
445;
446; X86AVX2-LABEL: arg_i8_v32i8_undef:
447; X86AVX2:       # %bb.0:
448; X86AVX2-NEXT:    vpbroadcastb {{[0-9]+}}(%esp), %ymm0
449; X86AVX2-NEXT:    retl
450  %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
451  ret <32 x i8> %ins
452}
453
454define <16 x i16> @arg_i16_v16i16_undef(i16 %x, i32 %y) nounwind {
455; SSE-LABEL: arg_i16_v16i16_undef:
456; SSE:       # %bb.0:
457; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
458; SSE-NEXT:    andl $15, %esi
459; SSE-NEXT:    movw %di, -40(%rsp,%rsi,2)
460; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
461; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
462; SSE-NEXT:    retq
463;
464; AVX1-LABEL: arg_i16_v16i16_undef:
465; AVX1:       # %bb.0:
466; AVX1-NEXT:    vmovd %edi, %xmm0
467; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
468; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
469; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
470; AVX1-NEXT:    retq
471;
472; AVX2-LABEL: arg_i16_v16i16_undef:
473; AVX2:       # %bb.0:
474; AVX2-NEXT:    vmovd %edi, %xmm0
475; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
476; AVX2-NEXT:    retq
477;
478; AVX512F-LABEL: arg_i16_v16i16_undef:
479; AVX512F:       # %bb.0:
480; AVX512F-NEXT:    vmovd %edi, %xmm0
481; AVX512F-NEXT:    vpbroadcastw %xmm0, %ymm0
482; AVX512F-NEXT:    retq
483;
484; AVX512BW-LABEL: arg_i16_v16i16_undef:
485; AVX512BW:       # %bb.0:
486; AVX512BW-NEXT:    vpbroadcastw %edi, %ymm0
487; AVX512BW-NEXT:    retq
488;
489; X86AVX2-LABEL: arg_i16_v16i16_undef:
490; X86AVX2:       # %bb.0:
491; X86AVX2-NEXT:    vpbroadcastw {{[0-9]+}}(%esp), %ymm0
492; X86AVX2-NEXT:    retl
493  %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
494  ret <16 x i16> %ins
495}
496
497define <8 x i32> @arg_i32_v8i32_undef(i32 %x, i32 %y) nounwind {
498; SSE-LABEL: arg_i32_v8i32_undef:
499; SSE:       # %bb.0:
500; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
501; SSE-NEXT:    andl $7, %esi
502; SSE-NEXT:    movl %edi, -40(%rsp,%rsi,4)
503; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
504; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
505; SSE-NEXT:    retq
506;
507; AVX1-LABEL: arg_i32_v8i32_undef:
508; AVX1:       # %bb.0:
509; AVX1-NEXT:    vmovd %edi, %xmm0
510; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
511; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
512; AVX1-NEXT:    retq
513;
514; AVX2-LABEL: arg_i32_v8i32_undef:
515; AVX2:       # %bb.0:
516; AVX2-NEXT:    vmovd %edi, %xmm0
517; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
518; AVX2-NEXT:    retq
519;
520; AVX512-LABEL: arg_i32_v8i32_undef:
521; AVX512:       # %bb.0:
522; AVX512-NEXT:    vpbroadcastd %edi, %ymm0
523; AVX512-NEXT:    retq
524;
525; X86AVX2-LABEL: arg_i32_v8i32_undef:
526; X86AVX2:       # %bb.0:
527; X86AVX2-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
528; X86AVX2-NEXT:    retl
529  %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
530  ret <8 x i32> %ins
531}
532
533define <4 x i64> @arg_i64_v4i64_undef(i64 %x, i32 %y) nounwind {
534; SSE-LABEL: arg_i64_v4i64_undef:
535; SSE:       # %bb.0:
536; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
537; SSE-NEXT:    andl $3, %esi
538; SSE-NEXT:    movq %rdi, -40(%rsp,%rsi,8)
539; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
540; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
541; SSE-NEXT:    retq
542;
543; AVX1-LABEL: arg_i64_v4i64_undef:
544; AVX1:       # %bb.0:
545; AVX1-NEXT:    vmovq %rdi, %xmm0
546; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
547; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
548; AVX1-NEXT:    retq
549;
550; AVX2-LABEL: arg_i64_v4i64_undef:
551; AVX2:       # %bb.0:
552; AVX2-NEXT:    vmovq %rdi, %xmm0
553; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
554; AVX2-NEXT:    retq
555;
556; AVX512-LABEL: arg_i64_v4i64_undef:
557; AVX512:       # %bb.0:
558; AVX512-NEXT:    vpbroadcastq %rdi, %ymm0
559; AVX512-NEXT:    retq
560;
561; X86AVX2-LABEL: arg_i64_v4i64_undef:
562; X86AVX2:       # %bb.0:
563; X86AVX2-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
564; X86AVX2-NEXT:    retl
565  %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
566  ret <4 x i64> %ins
567}
568
569define <8 x float> @arg_f32_v8f32_undef(float %x, i32 %y) nounwind {
570; SSE-LABEL: arg_f32_v8f32_undef:
571; SSE:       # %bb.0:
572; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
573; SSE-NEXT:    andl $7, %edi
574; SSE-NEXT:    movss %xmm0, -40(%rsp,%rdi,4)
575; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
576; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
577; SSE-NEXT:    retq
578;
579; AVX1-LABEL: arg_f32_v8f32_undef:
580; AVX1:       # %bb.0:
581; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
582; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
583; AVX1-NEXT:    retq
584;
585; AVX2-LABEL: arg_f32_v8f32_undef:
586; AVX2:       # %bb.0:
587; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
588; AVX2-NEXT:    retq
589;
590; AVX512-LABEL: arg_f32_v8f32_undef:
591; AVX512:       # %bb.0:
592; AVX512-NEXT:    vbroadcastss %xmm0, %ymm0
593; AVX512-NEXT:    retq
594;
595; X86AVX2-LABEL: arg_f32_v8f32_undef:
596; X86AVX2:       # %bb.0:
597; X86AVX2-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
598; X86AVX2-NEXT:    retl
599  %ins = insertelement <8 x float> undef, float %x, i32 %y
600  ret <8 x float> %ins
601}
602
603define <4 x double> @arg_f64_v4f64_undef(double %x, i32 %y) nounwind {
604; SSE-LABEL: arg_f64_v4f64_undef:
605; SSE:       # %bb.0:
606; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
607; SSE-NEXT:    andl $3, %edi
608; SSE-NEXT:    movsd %xmm0, -40(%rsp,%rdi,8)
609; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
610; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
611; SSE-NEXT:    retq
612;
613; AVX1-LABEL: arg_f64_v4f64_undef:
614; AVX1:       # %bb.0:
615; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
616; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
617; AVX1-NEXT:    retq
618;
619; AVX2-LABEL: arg_f64_v4f64_undef:
620; AVX2:       # %bb.0:
621; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
622; AVX2-NEXT:    retq
623;
624; AVX512-LABEL: arg_f64_v4f64_undef:
625; AVX512:       # %bb.0:
626; AVX512-NEXT:    vbroadcastsd %xmm0, %ymm0
627; AVX512-NEXT:    retq
628;
629; X86AVX2-LABEL: arg_f64_v4f64_undef:
630; X86AVX2:       # %bb.0:
631; X86AVX2-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
632; X86AVX2-NEXT:    retl
633  %ins = insertelement <4 x double> undef, double %x, i32 %y
634  ret <4 x double> %ins
635}
636
637define <32 x i8> @load_i8_v32i8_undef(ptr %p, i32 %y) nounwind {
638; SSE-LABEL: load_i8_v32i8_undef:
639; SSE:       # %bb.0:
640; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
641; SSE-NEXT:    movzbl (%rdi), %eax
642; SSE-NEXT:    andl $31, %esi
643; SSE-NEXT:    movb %al, -40(%rsp,%rsi)
644; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
645; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
646; SSE-NEXT:    retq
647;
648; AVX1-LABEL: load_i8_v32i8_undef:
649; AVX1:       # %bb.0:
650; AVX1-NEXT:    movzbl (%rdi), %eax
651; AVX1-NEXT:    vmovd %eax, %xmm0
652; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
653; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
654; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
655; AVX1-NEXT:    retq
656;
657; AVX2-LABEL: load_i8_v32i8_undef:
658; AVX2:       # %bb.0:
659; AVX2-NEXT:    vpbroadcastb (%rdi), %ymm0
660; AVX2-NEXT:    retq
661;
662; AVX512-LABEL: load_i8_v32i8_undef:
663; AVX512:       # %bb.0:
664; AVX512-NEXT:    vpbroadcastb (%rdi), %ymm0
665; AVX512-NEXT:    retq
666;
667; X86AVX2-LABEL: load_i8_v32i8_undef:
668; X86AVX2:       # %bb.0:
669; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
670; X86AVX2-NEXT:    vpbroadcastb (%eax), %ymm0
671; X86AVX2-NEXT:    retl
672  %x = load i8, ptr %p
673  %ins = insertelement <32 x i8> undef, i8 %x, i32 %y
674  ret <32 x i8> %ins
675}
676
677define <16 x i16> @load_i16_v16i16_undef(ptr %p, i32 %y) nounwind {
678; SSE-LABEL: load_i16_v16i16_undef:
679; SSE:       # %bb.0:
680; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
681; SSE-NEXT:    movzwl (%rdi), %eax
682; SSE-NEXT:    andl $15, %esi
683; SSE-NEXT:    movw %ax, -40(%rsp,%rsi,2)
684; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
685; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
686; SSE-NEXT:    retq
687;
688; AVX1-LABEL: load_i16_v16i16_undef:
689; AVX1:       # %bb.0:
690; AVX1-NEXT:    movzwl (%rdi), %eax
691; AVX1-NEXT:    vmovd %eax, %xmm0
692; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
693; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
694; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
695; AVX1-NEXT:    retq
696;
697; AVX2-LABEL: load_i16_v16i16_undef:
698; AVX2:       # %bb.0:
699; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
700; AVX2-NEXT:    retq
701;
702; AVX512-LABEL: load_i16_v16i16_undef:
703; AVX512:       # %bb.0:
704; AVX512-NEXT:    vpbroadcastw (%rdi), %ymm0
705; AVX512-NEXT:    retq
706;
707; X86AVX2-LABEL: load_i16_v16i16_undef:
708; X86AVX2:       # %bb.0:
709; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
710; X86AVX2-NEXT:    vpbroadcastw (%eax), %ymm0
711; X86AVX2-NEXT:    retl
712  %x = load i16, ptr %p
713  %ins = insertelement <16 x i16> undef, i16 %x, i32 %y
714  ret <16 x i16> %ins
715}
716
717define <8 x i32> @load_i32_v8i32_undef(ptr %p, i32 %y) nounwind {
718; SSE-LABEL: load_i32_v8i32_undef:
719; SSE:       # %bb.0:
720; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
721; SSE-NEXT:    movl (%rdi), %eax
722; SSE-NEXT:    andl $7, %esi
723; SSE-NEXT:    movl %eax, -40(%rsp,%rsi,4)
724; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
725; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
726; SSE-NEXT:    retq
727;
728; AVX-LABEL: load_i32_v8i32_undef:
729; AVX:       # %bb.0:
730; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
731; AVX-NEXT:    retq
732;
733; X86AVX2-LABEL: load_i32_v8i32_undef:
734; X86AVX2:       # %bb.0:
735; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
736; X86AVX2-NEXT:    vbroadcastss (%eax), %ymm0
737; X86AVX2-NEXT:    retl
738  %x = load i32, ptr %p
739  %ins = insertelement <8 x i32> undef, i32 %x, i32 %y
740  ret <8 x i32> %ins
741}
742
743define <4 x i64> @load_i64_v4i64_undef(ptr %p, i32 %y) nounwind {
744; SSE-LABEL: load_i64_v4i64_undef:
745; SSE:       # %bb.0:
746; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
747; SSE-NEXT:    movq (%rdi), %rax
748; SSE-NEXT:    andl $3, %esi
749; SSE-NEXT:    movq %rax, -40(%rsp,%rsi,8)
750; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
751; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
752; SSE-NEXT:    retq
753;
754; AVX-LABEL: load_i64_v4i64_undef:
755; AVX:       # %bb.0:
756; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
757; AVX-NEXT:    retq
758;
759; X86AVX2-LABEL: load_i64_v4i64_undef:
760; X86AVX2:       # %bb.0:
761; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
762; X86AVX2-NEXT:    vbroadcastsd (%eax), %ymm0
763; X86AVX2-NEXT:    retl
764  %x = load i64, ptr %p
765  %ins = insertelement <4 x i64> undef, i64 %x, i32 %y
766  ret <4 x i64> %ins
767}
768
769define <8 x float> @load_f32_v8f32_undef(ptr %p, i32 %y) nounwind {
770; SSE-LABEL: load_f32_v8f32_undef:
771; SSE:       # %bb.0:
772; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
773; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
774; SSE-NEXT:    andl $7, %esi
775; SSE-NEXT:    movss %xmm0, -40(%rsp,%rsi,4)
776; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
777; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
778; SSE-NEXT:    retq
779;
780; AVX-LABEL: load_f32_v8f32_undef:
781; AVX:       # %bb.0:
782; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
783; AVX-NEXT:    retq
784;
785; X86AVX2-LABEL: load_f32_v8f32_undef:
786; X86AVX2:       # %bb.0:
787; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
788; X86AVX2-NEXT:    vbroadcastss (%eax), %ymm0
789; X86AVX2-NEXT:    retl
790  %x = load float, ptr %p
791  %ins = insertelement <8 x float> undef, float %x, i32 %y
792  ret <8 x float> %ins
793}
794
795define <4 x double> @load_f64_v4f64_undef(ptr %p, i32 %y) nounwind {
796; SSE-LABEL: load_f64_v4f64_undef:
797; SSE:       # %bb.0:
798; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
799; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
800; SSE-NEXT:    andl $3, %esi
801; SSE-NEXT:    movsd %xmm0, -40(%rsp,%rsi,8)
802; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
803; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
804; SSE-NEXT:    retq
805;
806; AVX-LABEL: load_f64_v4f64_undef:
807; AVX:       # %bb.0:
808; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0
809; AVX-NEXT:    retq
810;
811; X86AVX2-LABEL: load_f64_v4f64_undef:
812; X86AVX2:       # %bb.0:
813; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
814; X86AVX2-NEXT:    vbroadcastsd (%eax), %ymm0
815; X86AVX2-NEXT:    retl
816  %x = load double, ptr %p
817  %ins = insertelement <4 x double> undef, double %x, i32 %y
818  ret <4 x double> %ins
819}
820
821;
822; Insertion into arg vectors
823;
824
825define <16 x i8> @arg_i8_v16i8(<16 x i8> %v, i8 %x, i32 %y) nounwind {
826; SSE-LABEL: arg_i8_v16i8:
827; SSE:       # %bb.0:
828; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
829; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
830; SSE-NEXT:    andl $15, %esi
831; SSE-NEXT:    movb %dil, -24(%rsp,%rsi)
832; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
833; SSE-NEXT:    retq
834;
835; AVX1OR2-LABEL: arg_i8_v16i8:
836; AVX1OR2:       # %bb.0:
837; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
838; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
839; AVX1OR2-NEXT:    andl $15, %esi
840; AVX1OR2-NEXT:    movb %dil, -24(%rsp,%rsi)
841; AVX1OR2-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
842; AVX1OR2-NEXT:    retq
843;
844; AVX512F-LABEL: arg_i8_v16i8:
845; AVX512F:       # %bb.0:
846; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
847; AVX512F-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
848; AVX512F-NEXT:    andl $15, %esi
849; AVX512F-NEXT:    movb %dil, -24(%rsp,%rsi)
850; AVX512F-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
851; AVX512F-NEXT:    retq
852;
853; AVX512BW-LABEL: arg_i8_v16i8:
854; AVX512BW:       # %bb.0:
855; AVX512BW-NEXT:    vpbroadcastb %esi, %xmm1
856; AVX512BW-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
857; AVX512BW-NEXT:    vpbroadcastb %edi, %xmm0 {%k1}
858; AVX512BW-NEXT:    retq
859;
860; X86AVX2-LABEL: arg_i8_v16i8:
861; X86AVX2:       # %bb.0:
862; X86AVX2-NEXT:    pushl %ebp
863; X86AVX2-NEXT:    movl %esp, %ebp
864; X86AVX2-NEXT:    andl $-16, %esp
865; X86AVX2-NEXT:    subl $32, %esp
866; X86AVX2-NEXT:    movl 12(%ebp), %eax
867; X86AVX2-NEXT:    andl $15, %eax
868; X86AVX2-NEXT:    movzbl 8(%ebp), %ecx
869; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
870; X86AVX2-NEXT:    movb %cl, (%esp,%eax)
871; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
872; X86AVX2-NEXT:    movl %ebp, %esp
873; X86AVX2-NEXT:    popl %ebp
874; X86AVX2-NEXT:    retl
875  %ins = insertelement <16 x i8> %v, i8 %x, i32 %y
876  ret <16 x i8> %ins
877}
878
879define <8 x i16> @arg_i16_v8i16(<8 x i16> %v, i16 %x, i32 %y) nounwind {
880; SSE-LABEL: arg_i16_v8i16:
881; SSE:       # %bb.0:
882; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
883; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
884; SSE-NEXT:    andl $7, %esi
885; SSE-NEXT:    movw %di, -24(%rsp,%rsi,2)
886; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
887; SSE-NEXT:    retq
888;
889; AVX1OR2-LABEL: arg_i16_v8i16:
890; AVX1OR2:       # %bb.0:
891; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
892; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
893; AVX1OR2-NEXT:    andl $7, %esi
894; AVX1OR2-NEXT:    movw %di, -24(%rsp,%rsi,2)
895; AVX1OR2-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
896; AVX1OR2-NEXT:    retq
897;
898; AVX512F-LABEL: arg_i16_v8i16:
899; AVX512F:       # %bb.0:
900; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
901; AVX512F-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
902; AVX512F-NEXT:    andl $7, %esi
903; AVX512F-NEXT:    movw %di, -24(%rsp,%rsi,2)
904; AVX512F-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
905; AVX512F-NEXT:    retq
906;
907; AVX512BW-LABEL: arg_i16_v8i16:
908; AVX512BW:       # %bb.0:
909; AVX512BW-NEXT:    vpbroadcastw %esi, %xmm1
910; AVX512BW-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
911; AVX512BW-NEXT:    vpbroadcastw %edi, %xmm0 {%k1}
912; AVX512BW-NEXT:    retq
913;
914; X86AVX2-LABEL: arg_i16_v8i16:
915; X86AVX2:       # %bb.0:
916; X86AVX2-NEXT:    pushl %ebp
917; X86AVX2-NEXT:    movl %esp, %ebp
918; X86AVX2-NEXT:    andl $-16, %esp
919; X86AVX2-NEXT:    subl $32, %esp
920; X86AVX2-NEXT:    movl 12(%ebp), %eax
921; X86AVX2-NEXT:    andl $7, %eax
922; X86AVX2-NEXT:    movzwl 8(%ebp), %ecx
923; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
924; X86AVX2-NEXT:    movw %cx, (%esp,%eax,2)
925; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
926; X86AVX2-NEXT:    movl %ebp, %esp
927; X86AVX2-NEXT:    popl %ebp
928; X86AVX2-NEXT:    retl
929  %ins = insertelement <8 x i16> %v, i16 %x, i32 %y
930  ret <8 x i16> %ins
931}
932
933define <4 x i32> @arg_i32_v4i32(<4 x i32> %v, i32 %x, i32 %y) nounwind {
934; SSE-LABEL: arg_i32_v4i32:
935; SSE:       # %bb.0:
936; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
937; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
938; SSE-NEXT:    andl $3, %esi
939; SSE-NEXT:    movl %edi, -24(%rsp,%rsi,4)
940; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
941; SSE-NEXT:    retq
942;
943; AVX1OR2-LABEL: arg_i32_v4i32:
944; AVX1OR2:       # %bb.0:
945; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
946; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
947; AVX1OR2-NEXT:    andl $3, %esi
948; AVX1OR2-NEXT:    movl %edi, -24(%rsp,%rsi,4)
949; AVX1OR2-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
950; AVX1OR2-NEXT:    retq
951;
952; AVX512-LABEL: arg_i32_v4i32:
953; AVX512:       # %bb.0:
954; AVX512-NEXT:    vpbroadcastd %esi, %xmm1
955; AVX512-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
956; AVX512-NEXT:    vpbroadcastd %edi, %xmm0 {%k1}
957; AVX512-NEXT:    retq
958;
959; X86AVX2-LABEL: arg_i32_v4i32:
960; X86AVX2:       # %bb.0:
961; X86AVX2-NEXT:    pushl %ebp
962; X86AVX2-NEXT:    movl %esp, %ebp
963; X86AVX2-NEXT:    andl $-16, %esp
964; X86AVX2-NEXT:    subl $32, %esp
965; X86AVX2-NEXT:    movl 12(%ebp), %eax
966; X86AVX2-NEXT:    andl $3, %eax
967; X86AVX2-NEXT:    movl 8(%ebp), %ecx
968; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
969; X86AVX2-NEXT:    movl %ecx, (%esp,%eax,4)
970; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
971; X86AVX2-NEXT:    movl %ebp, %esp
972; X86AVX2-NEXT:    popl %ebp
973; X86AVX2-NEXT:    retl
974  %ins = insertelement <4 x i32> %v, i32 %x, i32 %y
975  ret <4 x i32> %ins
976}
977
978define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind {
979; SSE-LABEL: arg_i64_v2i64:
980; SSE:       # %bb.0:
981; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
982; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
983; SSE-NEXT:    andl $1, %esi
984; SSE-NEXT:    movq %rdi, -24(%rsp,%rsi,8)
985; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
986; SSE-NEXT:    retq
987;
988; AVX1OR2-LABEL: arg_i64_v2i64:
989; AVX1OR2:       # %bb.0:
990; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
991; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
992; AVX1OR2-NEXT:    andl $1, %esi
993; AVX1OR2-NEXT:    movq %rdi, -24(%rsp,%rsi,8)
994; AVX1OR2-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
995; AVX1OR2-NEXT:    retq
996;
997; AVX512-LABEL: arg_i64_v2i64:
998; AVX512:       # %bb.0:
999; AVX512-NEXT:    movl %esi, %eax
1000; AVX512-NEXT:    vpbroadcastq %rax, %xmm1
1001; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1002; AVX512-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1}
1003; AVX512-NEXT:    retq
1004;
1005; X86AVX2-LABEL: arg_i64_v2i64:
1006; X86AVX2:       # %bb.0:
1007; X86AVX2-NEXT:    pushl %ebp
1008; X86AVX2-NEXT:    movl %esp, %ebp
1009; X86AVX2-NEXT:    pushl %esi
1010; X86AVX2-NEXT:    andl $-16, %esp
1011; X86AVX2-NEXT:    subl $48, %esp
1012; X86AVX2-NEXT:    movl 8(%ebp), %edx
1013; X86AVX2-NEXT:    movl 12(%ebp), %eax
1014; X86AVX2-NEXT:    movl 16(%ebp), %ecx
1015; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
1016; X86AVX2-NEXT:    addl %ecx, %ecx
1017; X86AVX2-NEXT:    movl %ecx, %esi
1018; X86AVX2-NEXT:    andl $3, %esi
1019; X86AVX2-NEXT:    movl %edx, (%esp,%esi,4)
1020; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
1021; X86AVX2-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
1022; X86AVX2-NEXT:    incl %ecx
1023; X86AVX2-NEXT:    andl $3, %ecx
1024; X86AVX2-NEXT:    movl %eax, 16(%esp,%ecx,4)
1025; X86AVX2-NEXT:    vmovaps {{[0-9]+}}(%esp), %xmm0
1026; X86AVX2-NEXT:    leal -4(%ebp), %esp
1027; X86AVX2-NEXT:    popl %esi
1028; X86AVX2-NEXT:    popl %ebp
1029; X86AVX2-NEXT:    retl
1030  %ins = insertelement <2 x i64> %v, i64 %x, i32 %y
1031  ret <2 x i64> %ins
1032}
1033
1034define <4 x float> @arg_f32_v4f32(<4 x float> %v, float %x, i32 %y) nounwind {
1035; SSE2-LABEL: arg_f32_v4f32:
1036; SSE2:       # %bb.0:
1037; SSE2-NEXT:    # kill: def $edi killed $edi def $rdi
1038; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1039; SSE2-NEXT:    andl $3, %edi
1040; SSE2-NEXT:    movss %xmm1, -24(%rsp,%rdi,4)
1041; SSE2-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1042; SSE2-NEXT:    retq
1043;
1044; SSE41-LABEL: arg_f32_v4f32:
1045; SSE41:       # %bb.0:
1046; SSE41-NEXT:    movaps %xmm0, %xmm2
1047; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1048; SSE41-NEXT:    movd %edi, %xmm0
1049; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1050; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1051; SSE41-NEXT:    blendvps %xmm0, %xmm1, %xmm2
1052; SSE41-NEXT:    movaps %xmm2, %xmm0
1053; SSE41-NEXT:    retq
1054;
1055; AVX1-LABEL: arg_f32_v4f32:
1056; AVX1:       # %bb.0:
1057; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1058; AVX1-NEXT:    vmovd %edi, %xmm2
1059; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1060; AVX1-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1061; AVX1-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1062; AVX1-NEXT:    retq
1063;
1064; AVX2-LABEL: arg_f32_v4f32:
1065; AVX2:       # %bb.0:
1066; AVX2-NEXT:    vbroadcastss %xmm1, %xmm1
1067; AVX2-NEXT:    vmovd %edi, %xmm2
1068; AVX2-NEXT:    vpbroadcastd %xmm2, %xmm2
1069; AVX2-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1070; AVX2-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1071; AVX2-NEXT:    retq
1072;
1073; AVX512-LABEL: arg_f32_v4f32:
1074; AVX512:       # %bb.0:
1075; AVX512-NEXT:    vpbroadcastd %edi, %xmm2
1076; AVX512-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
1077; AVX512-NEXT:    vbroadcastss %xmm1, %xmm0 {%k1}
1078; AVX512-NEXT:    retq
1079;
1080; X86AVX2-LABEL: arg_f32_v4f32:
1081; X86AVX2:       # %bb.0:
1082; X86AVX2-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %xmm1
1083; X86AVX2-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
1084; X86AVX2-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm2
1085; X86AVX2-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
1086; X86AVX2-NEXT:    retl
1087  %ins = insertelement <4 x float> %v, float %x, i32 %y
1088  ret <4 x float> %ins
1089}
1090
1091define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind {
1092; SSE2-LABEL: arg_f64_v2f64:
1093; SSE2:       # %bb.0:
1094; SSE2-NEXT:    # kill: def $edi killed $edi def $rdi
1095; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1096; SSE2-NEXT:    andl $1, %edi
1097; SSE2-NEXT:    movsd %xmm1, -24(%rsp,%rdi,8)
1098; SSE2-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1099; SSE2-NEXT:    retq
1100;
1101; SSE41-LABEL: arg_f64_v2f64:
1102; SSE41:       # %bb.0:
1103; SSE41-NEXT:    movapd %xmm0, %xmm2
1104; SSE41-NEXT:    movddup {{.*#+}} xmm1 = xmm1[0,0]
1105; SSE41-NEXT:    movd %edi, %xmm0
1106; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1107; SSE41-NEXT:    pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1108; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
1109; SSE41-NEXT:    movapd %xmm2, %xmm0
1110; SSE41-NEXT:    retq
1111;
1112; AVX1-LABEL: arg_f64_v2f64:
1113; AVX1:       # %bb.0:
1114; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1115; AVX1-NEXT:    vmovd %edi, %xmm2
1116; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
1117; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1118; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1119; AVX1-NEXT:    retq
1120;
1121; AVX2-LABEL: arg_f64_v2f64:
1122; AVX2:       # %bb.0:
1123; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1124; AVX2-NEXT:    movl %edi, %eax
1125; AVX2-NEXT:    vmovq %rax, %xmm2
1126; AVX2-NEXT:    vpbroadcastq %xmm2, %xmm2
1127; AVX2-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1128; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1129; AVX2-NEXT:    retq
1130;
1131; AVX512-LABEL: arg_f64_v2f64:
1132; AVX512:       # %bb.0:
1133; AVX512-NEXT:    movl %edi, %eax
1134; AVX512-NEXT:    vpbroadcastq %rax, %xmm2
1135; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
1136; AVX512-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
1137; AVX512-NEXT:    retq
1138;
1139; X86AVX2-LABEL: arg_f64_v2f64:
1140; X86AVX2:       # %bb.0:
1141; X86AVX2-NEXT:    pushl %ebp
1142; X86AVX2-NEXT:    movl %esp, %ebp
1143; X86AVX2-NEXT:    andl $-16, %esp
1144; X86AVX2-NEXT:    subl $32, %esp
1145; X86AVX2-NEXT:    movl 16(%ebp), %eax
1146; X86AVX2-NEXT:    andl $1, %eax
1147; X86AVX2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1148; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
1149; X86AVX2-NEXT:    vmovsd %xmm1, (%esp,%eax,8)
1150; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
1151; X86AVX2-NEXT:    movl %ebp, %esp
1152; X86AVX2-NEXT:    popl %ebp
1153; X86AVX2-NEXT:    retl
1154  %ins = insertelement <2 x double> %v, double %x, i32 %y
1155  ret <2 x double> %ins
1156}
1157
1158define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind {
1159; SSE-LABEL: load_i8_v16i8:
1160; SSE:       # %bb.0:
1161; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1162; SSE-NEXT:    movzbl (%rdi), %eax
1163; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1164; SSE-NEXT:    andl $15, %esi
1165; SSE-NEXT:    movb %al, -24(%rsp,%rsi)
1166; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1167; SSE-NEXT:    retq
1168;
1169; AVX1OR2-LABEL: load_i8_v16i8:
1170; AVX1OR2:       # %bb.0:
1171; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1172; AVX1OR2-NEXT:    movzbl (%rdi), %eax
1173; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1174; AVX1OR2-NEXT:    andl $15, %esi
1175; AVX1OR2-NEXT:    movb %al, -24(%rsp,%rsi)
1176; AVX1OR2-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
1177; AVX1OR2-NEXT:    retq
1178;
1179; AVX512F-LABEL: load_i8_v16i8:
1180; AVX512F:       # %bb.0:
1181; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
1182; AVX512F-NEXT:    movzbl (%rdi), %eax
1183; AVX512F-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1184; AVX512F-NEXT:    andl $15, %esi
1185; AVX512F-NEXT:    movb %al, -24(%rsp,%rsi)
1186; AVX512F-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
1187; AVX512F-NEXT:    retq
1188;
1189; AVX512BW-LABEL: load_i8_v16i8:
1190; AVX512BW:       # %bb.0:
1191; AVX512BW-NEXT:    vpbroadcastb %esi, %xmm1
1192; AVX512BW-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1193; AVX512BW-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1}
1194; AVX512BW-NEXT:    retq
1195;
1196; X86AVX2-LABEL: load_i8_v16i8:
1197; X86AVX2:       # %bb.0:
1198; X86AVX2-NEXT:    pushl %ebp
1199; X86AVX2-NEXT:    movl %esp, %ebp
1200; X86AVX2-NEXT:    andl $-16, %esp
1201; X86AVX2-NEXT:    subl $32, %esp
1202; X86AVX2-NEXT:    movl 12(%ebp), %eax
1203; X86AVX2-NEXT:    andl $15, %eax
1204; X86AVX2-NEXT:    movl 8(%ebp), %ecx
1205; X86AVX2-NEXT:    movzbl (%ecx), %ecx
1206; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
1207; X86AVX2-NEXT:    movb %cl, (%esp,%eax)
1208; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
1209; X86AVX2-NEXT:    movl %ebp, %esp
1210; X86AVX2-NEXT:    popl %ebp
1211; X86AVX2-NEXT:    retl
1212  %x = load i8, ptr %p
1213  %ins = insertelement <16 x i8> %v, i8 %x, i32 %y
1214  ret <16 x i8> %ins
1215}
1216
1217define <8 x i16> @load_i16_v8i16(<8 x i16> %v, ptr %p, i32 %y) nounwind {
1218; SSE-LABEL: load_i16_v8i16:
1219; SSE:       # %bb.0:
1220; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1221; SSE-NEXT:    movzwl (%rdi), %eax
1222; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1223; SSE-NEXT:    andl $7, %esi
1224; SSE-NEXT:    movw %ax, -24(%rsp,%rsi,2)
1225; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1226; SSE-NEXT:    retq
1227;
1228; AVX1OR2-LABEL: load_i16_v8i16:
1229; AVX1OR2:       # %bb.0:
1230; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1231; AVX1OR2-NEXT:    movzwl (%rdi), %eax
1232; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1233; AVX1OR2-NEXT:    andl $7, %esi
1234; AVX1OR2-NEXT:    movw %ax, -24(%rsp,%rsi,2)
1235; AVX1OR2-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
1236; AVX1OR2-NEXT:    retq
1237;
1238; AVX512F-LABEL: load_i16_v8i16:
1239; AVX512F:       # %bb.0:
1240; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
1241; AVX512F-NEXT:    movzwl (%rdi), %eax
1242; AVX512F-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1243; AVX512F-NEXT:    andl $7, %esi
1244; AVX512F-NEXT:    movw %ax, -24(%rsp,%rsi,2)
1245; AVX512F-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
1246; AVX512F-NEXT:    retq
1247;
1248; AVX512BW-LABEL: load_i16_v8i16:
1249; AVX512BW:       # %bb.0:
1250; AVX512BW-NEXT:    vpbroadcastw %esi, %xmm1
1251; AVX512BW-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1252; AVX512BW-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1}
1253; AVX512BW-NEXT:    retq
1254;
1255; X86AVX2-LABEL: load_i16_v8i16:
1256; X86AVX2:       # %bb.0:
1257; X86AVX2-NEXT:    pushl %ebp
1258; X86AVX2-NEXT:    movl %esp, %ebp
1259; X86AVX2-NEXT:    andl $-16, %esp
1260; X86AVX2-NEXT:    subl $32, %esp
1261; X86AVX2-NEXT:    movl 12(%ebp), %eax
1262; X86AVX2-NEXT:    andl $7, %eax
1263; X86AVX2-NEXT:    movl 8(%ebp), %ecx
1264; X86AVX2-NEXT:    movzwl (%ecx), %ecx
1265; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
1266; X86AVX2-NEXT:    movw %cx, (%esp,%eax,2)
1267; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
1268; X86AVX2-NEXT:    movl %ebp, %esp
1269; X86AVX2-NEXT:    popl %ebp
1270; X86AVX2-NEXT:    retl
1271  %x = load i16, ptr %p
1272  %ins = insertelement <8 x i16> %v, i16 %x, i32 %y
1273  ret <8 x i16> %ins
1274}
1275
1276define <4 x i32> @load_i32_v4i32(<4 x i32> %v, ptr %p, i32 %y) nounwind {
1277; SSE-LABEL: load_i32_v4i32:
1278; SSE:       # %bb.0:
1279; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1280; SSE-NEXT:    movl (%rdi), %eax
1281; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1282; SSE-NEXT:    andl $3, %esi
1283; SSE-NEXT:    movl %eax, -24(%rsp,%rsi,4)
1284; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1285; SSE-NEXT:    retq
1286;
1287; AVX1OR2-LABEL: load_i32_v4i32:
1288; AVX1OR2:       # %bb.0:
1289; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1290; AVX1OR2-NEXT:    movl (%rdi), %eax
1291; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1292; AVX1OR2-NEXT:    andl $3, %esi
1293; AVX1OR2-NEXT:    movl %eax, -24(%rsp,%rsi,4)
1294; AVX1OR2-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
1295; AVX1OR2-NEXT:    retq
1296;
1297; AVX512-LABEL: load_i32_v4i32:
1298; AVX512:       # %bb.0:
1299; AVX512-NEXT:    vpbroadcastd %esi, %xmm1
1300; AVX512-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1301; AVX512-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1}
1302; AVX512-NEXT:    retq
1303;
1304; X86AVX2-LABEL: load_i32_v4i32:
1305; X86AVX2:       # %bb.0:
1306; X86AVX2-NEXT:    pushl %ebp
1307; X86AVX2-NEXT:    movl %esp, %ebp
1308; X86AVX2-NEXT:    andl $-16, %esp
1309; X86AVX2-NEXT:    subl $32, %esp
1310; X86AVX2-NEXT:    movl 12(%ebp), %eax
1311; X86AVX2-NEXT:    andl $3, %eax
1312; X86AVX2-NEXT:    movl 8(%ebp), %ecx
1313; X86AVX2-NEXT:    movl (%ecx), %ecx
1314; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
1315; X86AVX2-NEXT:    movl %ecx, (%esp,%eax,4)
1316; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
1317; X86AVX2-NEXT:    movl %ebp, %esp
1318; X86AVX2-NEXT:    popl %ebp
1319; X86AVX2-NEXT:    retl
1320  %x = load i32, ptr %p
1321  %ins = insertelement <4 x i32> %v, i32 %x, i32 %y
1322  ret <4 x i32> %ins
1323}
1324
1325define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind {
1326; SSE-LABEL: load_i64_v2i64:
1327; SSE:       # %bb.0:
1328; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1329; SSE-NEXT:    movq (%rdi), %rax
1330; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1331; SSE-NEXT:    andl $1, %esi
1332; SSE-NEXT:    movq %rax, -24(%rsp,%rsi,8)
1333; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1334; SSE-NEXT:    retq
1335;
1336; AVX1OR2-LABEL: load_i64_v2i64:
1337; AVX1OR2:       # %bb.0:
1338; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1339; AVX1OR2-NEXT:    movq (%rdi), %rax
1340; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1341; AVX1OR2-NEXT:    andl $1, %esi
1342; AVX1OR2-NEXT:    movq %rax, -24(%rsp,%rsi,8)
1343; AVX1OR2-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
1344; AVX1OR2-NEXT:    retq
1345;
1346; AVX512-LABEL: load_i64_v2i64:
1347; AVX512:       # %bb.0:
1348; AVX512-NEXT:    movl %esi, %eax
1349; AVX512-NEXT:    vpbroadcastq %rax, %xmm1
1350; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1351; AVX512-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1}
1352; AVX512-NEXT:    retq
1353;
1354; X86AVX2-LABEL: load_i64_v2i64:
1355; X86AVX2:       # %bb.0:
1356; X86AVX2-NEXT:    pushl %ebp
1357; X86AVX2-NEXT:    movl %esp, %ebp
1358; X86AVX2-NEXT:    pushl %esi
1359; X86AVX2-NEXT:    andl $-16, %esp
1360; X86AVX2-NEXT:    subl $48, %esp
1361; X86AVX2-NEXT:    movl 12(%ebp), %eax
1362; X86AVX2-NEXT:    movl 8(%ebp), %ecx
1363; X86AVX2-NEXT:    movl (%ecx), %edx
1364; X86AVX2-NEXT:    movl 4(%ecx), %ecx
1365; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
1366; X86AVX2-NEXT:    addl %eax, %eax
1367; X86AVX2-NEXT:    movl %eax, %esi
1368; X86AVX2-NEXT:    andl $3, %esi
1369; X86AVX2-NEXT:    movl %edx, (%esp,%esi,4)
1370; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
1371; X86AVX2-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
1372; X86AVX2-NEXT:    incl %eax
1373; X86AVX2-NEXT:    andl $3, %eax
1374; X86AVX2-NEXT:    movl %ecx, 16(%esp,%eax,4)
1375; X86AVX2-NEXT:    vmovaps {{[0-9]+}}(%esp), %xmm0
1376; X86AVX2-NEXT:    leal -4(%ebp), %esp
1377; X86AVX2-NEXT:    popl %esi
1378; X86AVX2-NEXT:    popl %ebp
1379; X86AVX2-NEXT:    retl
1380  %x = load i64, ptr %p
1381  %ins = insertelement <2 x i64> %v, i64 %x, i32 %y
1382  ret <2 x i64> %ins
1383}
1384
1385define <4 x float> @load_f32_v4f32(<4 x float> %v, ptr %p, i32 %y) nounwind {
1386; SSE2-LABEL: load_f32_v4f32:
1387; SSE2:       # %bb.0:
1388; SSE2-NEXT:    # kill: def $esi killed $esi def $rsi
1389; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1390; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1391; SSE2-NEXT:    andl $3, %esi
1392; SSE2-NEXT:    movss %xmm1, -24(%rsp,%rsi,4)
1393; SSE2-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1394; SSE2-NEXT:    retq
1395;
1396; SSE41-LABEL: load_f32_v4f32:
1397; SSE41:       # %bb.0:
1398; SSE41-NEXT:    movaps %xmm0, %xmm1
1399; SSE41-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1400; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
1401; SSE41-NEXT:    movd %esi, %xmm0
1402; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1403; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1404; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm1
1405; SSE41-NEXT:    movaps %xmm1, %xmm0
1406; SSE41-NEXT:    retq
1407;
1408; AVX1-LABEL: load_f32_v4f32:
1409; AVX1:       # %bb.0:
1410; AVX1-NEXT:    vbroadcastss (%rdi), %xmm1
1411; AVX1-NEXT:    vmovd %esi, %xmm2
1412; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1413; AVX1-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1414; AVX1-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1415; AVX1-NEXT:    retq
1416;
1417; AVX2-LABEL: load_f32_v4f32:
1418; AVX2:       # %bb.0:
1419; AVX2-NEXT:    vbroadcastss (%rdi), %xmm1
1420; AVX2-NEXT:    vmovd %esi, %xmm2
1421; AVX2-NEXT:    vpbroadcastd %xmm2, %xmm2
1422; AVX2-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1423; AVX2-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1424; AVX2-NEXT:    retq
1425;
1426; AVX512-LABEL: load_f32_v4f32:
1427; AVX512:       # %bb.0:
1428; AVX512-NEXT:    vpbroadcastd %esi, %xmm1
1429; AVX512-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1430; AVX512-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1}
1431; AVX512-NEXT:    retq
1432;
1433; X86AVX2-LABEL: load_f32_v4f32:
1434; X86AVX2:       # %bb.0:
1435; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1436; X86AVX2-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %xmm1
1437; X86AVX2-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
1438; X86AVX2-NEXT:    vbroadcastss (%eax), %xmm2
1439; X86AVX2-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
1440; X86AVX2-NEXT:    retl
1441  %x = load float, ptr %p
1442  %ins = insertelement <4 x float> %v, float %x, i32 %y
1443  ret <4 x float> %ins
1444}
1445
1446define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
1447; SSE2-LABEL: load_f64_v2f64:
1448; SSE2:       # %bb.0:
1449; SSE2-NEXT:    # kill: def $esi killed $esi def $rsi
1450; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1451; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1452; SSE2-NEXT:    andl $1, %esi
1453; SSE2-NEXT:    movsd %xmm1, -24(%rsp,%rsi,8)
1454; SSE2-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1455; SSE2-NEXT:    retq
1456;
1457; SSE41-LABEL: load_f64_v2f64:
1458; SSE41:       # %bb.0:
1459; SSE41-NEXT:    movapd %xmm0, %xmm1
1460; SSE41-NEXT:    movddup {{.*#+}} xmm2 = mem[0,0]
1461; SSE41-NEXT:    movd %esi, %xmm0
1462; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1463; SSE41-NEXT:    pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1464; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1465; SSE41-NEXT:    movapd %xmm1, %xmm0
1466; SSE41-NEXT:    retq
1467;
1468; AVX1-LABEL: load_f64_v2f64:
1469; AVX1:       # %bb.0:
1470; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
1471; AVX1-NEXT:    vmovd %esi, %xmm2
1472; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
1473; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1474; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1475; AVX1-NEXT:    retq
1476;
1477; AVX2-LABEL: load_f64_v2f64:
1478; AVX2:       # %bb.0:
1479; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
1480; AVX2-NEXT:    movl %esi, %eax
1481; AVX2-NEXT:    vmovq %rax, %xmm2
1482; AVX2-NEXT:    vpbroadcastq %xmm2, %xmm2
1483; AVX2-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1484; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1485; AVX2-NEXT:    retq
1486;
1487; AVX512-LABEL: load_f64_v2f64:
1488; AVX512:       # %bb.0:
1489; AVX512-NEXT:    movl %esi, %eax
1490; AVX512-NEXT:    vpbroadcastq %rax, %xmm1
1491; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
1492; AVX512-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
1493; AVX512-NEXT:    retq
1494;
1495; X86AVX2-LABEL: load_f64_v2f64:
1496; X86AVX2:       # %bb.0:
1497; X86AVX2-NEXT:    pushl %ebp
1498; X86AVX2-NEXT:    movl %esp, %ebp
1499; X86AVX2-NEXT:    andl $-16, %esp
1500; X86AVX2-NEXT:    subl $32, %esp
1501; X86AVX2-NEXT:    movl 12(%ebp), %eax
1502; X86AVX2-NEXT:    andl $1, %eax
1503; X86AVX2-NEXT:    movl 8(%ebp), %ecx
1504; X86AVX2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1505; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
1506; X86AVX2-NEXT:    vmovsd %xmm1, (%esp,%eax,8)
1507; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
1508; X86AVX2-NEXT:    movl %ebp, %esp
1509; X86AVX2-NEXT:    popl %ebp
1510; X86AVX2-NEXT:    retl
1511  %x = load double, ptr %p
1512  %ins = insertelement <2 x double> %v, double %x, i32 %y
1513  ret <2 x double> %ins
1514}
1515
1516define <32 x i8> @arg_i8_v32i8(<32 x i8> %v, i8 %x, i32 %y) nounwind {
1517; SSE-LABEL: arg_i8_v32i8:
1518; SSE:       # %bb.0:
1519; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1520; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
1521; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1522; SSE-NEXT:    andl $31, %esi
1523; SSE-NEXT:    movb %dil, -40(%rsp,%rsi)
1524; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1525; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
1526; SSE-NEXT:    retq
1527;
1528; AVX1OR2-LABEL: arg_i8_v32i8:
1529; AVX1OR2:       # %bb.0:
1530; AVX1OR2-NEXT:    pushq %rbp
1531; AVX1OR2-NEXT:    movq %rsp, %rbp
1532; AVX1OR2-NEXT:    andq $-32, %rsp
1533; AVX1OR2-NEXT:    subq $64, %rsp
1534; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1535; AVX1OR2-NEXT:    vmovaps %ymm0, (%rsp)
1536; AVX1OR2-NEXT:    andl $31, %esi
1537; AVX1OR2-NEXT:    movb %dil, (%rsp,%rsi)
1538; AVX1OR2-NEXT:    vmovaps (%rsp), %ymm0
1539; AVX1OR2-NEXT:    movq %rbp, %rsp
1540; AVX1OR2-NEXT:    popq %rbp
1541; AVX1OR2-NEXT:    retq
1542;
1543; AVX512F-LABEL: arg_i8_v32i8:
1544; AVX512F:       # %bb.0:
1545; AVX512F-NEXT:    pushq %rbp
1546; AVX512F-NEXT:    movq %rsp, %rbp
1547; AVX512F-NEXT:    andq $-32, %rsp
1548; AVX512F-NEXT:    subq $64, %rsp
1549; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
1550; AVX512F-NEXT:    vmovaps %ymm0, (%rsp)
1551; AVX512F-NEXT:    andl $31, %esi
1552; AVX512F-NEXT:    movb %dil, (%rsp,%rsi)
1553; AVX512F-NEXT:    vmovaps (%rsp), %ymm0
1554; AVX512F-NEXT:    movq %rbp, %rsp
1555; AVX512F-NEXT:    popq %rbp
1556; AVX512F-NEXT:    retq
1557;
1558; AVX512BW-LABEL: arg_i8_v32i8:
1559; AVX512BW:       # %bb.0:
1560; AVX512BW-NEXT:    vpbroadcastb %esi, %ymm1
1561; AVX512BW-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1562; AVX512BW-NEXT:    vpbroadcastb %edi, %ymm0 {%k1}
1563; AVX512BW-NEXT:    retq
1564;
1565; X86AVX2-LABEL: arg_i8_v32i8:
1566; X86AVX2:       # %bb.0:
1567; X86AVX2-NEXT:    pushl %ebp
1568; X86AVX2-NEXT:    movl %esp, %ebp
1569; X86AVX2-NEXT:    andl $-32, %esp
1570; X86AVX2-NEXT:    subl $64, %esp
1571; X86AVX2-NEXT:    movl 12(%ebp), %eax
1572; X86AVX2-NEXT:    andl $31, %eax
1573; X86AVX2-NEXT:    movzbl 8(%ebp), %ecx
1574; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
1575; X86AVX2-NEXT:    movb %cl, (%esp,%eax)
1576; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
1577; X86AVX2-NEXT:    movl %ebp, %esp
1578; X86AVX2-NEXT:    popl %ebp
1579; X86AVX2-NEXT:    retl
1580  %ins = insertelement <32 x i8> %v, i8 %x, i32 %y
1581  ret <32 x i8> %ins
1582}
1583
1584define <16 x i16> @arg_i16_v16i16(<16 x i16> %v, i16 %x, i32 %y) nounwind {
1585; SSE-LABEL: arg_i16_v16i16:
1586; SSE:       # %bb.0:
1587; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1588; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
1589; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1590; SSE-NEXT:    andl $15, %esi
1591; SSE-NEXT:    movw %di, -40(%rsp,%rsi,2)
1592; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1593; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
1594; SSE-NEXT:    retq
1595;
1596; AVX1OR2-LABEL: arg_i16_v16i16:
1597; AVX1OR2:       # %bb.0:
1598; AVX1OR2-NEXT:    pushq %rbp
1599; AVX1OR2-NEXT:    movq %rsp, %rbp
1600; AVX1OR2-NEXT:    andq $-32, %rsp
1601; AVX1OR2-NEXT:    subq $64, %rsp
1602; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1603; AVX1OR2-NEXT:    vmovaps %ymm0, (%rsp)
1604; AVX1OR2-NEXT:    andl $15, %esi
1605; AVX1OR2-NEXT:    movw %di, (%rsp,%rsi,2)
1606; AVX1OR2-NEXT:    vmovaps (%rsp), %ymm0
1607; AVX1OR2-NEXT:    movq %rbp, %rsp
1608; AVX1OR2-NEXT:    popq %rbp
1609; AVX1OR2-NEXT:    retq
1610;
1611; AVX512F-LABEL: arg_i16_v16i16:
1612; AVX512F:       # %bb.0:
1613; AVX512F-NEXT:    pushq %rbp
1614; AVX512F-NEXT:    movq %rsp, %rbp
1615; AVX512F-NEXT:    andq $-32, %rsp
1616; AVX512F-NEXT:    subq $64, %rsp
1617; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
1618; AVX512F-NEXT:    vmovaps %ymm0, (%rsp)
1619; AVX512F-NEXT:    andl $15, %esi
1620; AVX512F-NEXT:    movw %di, (%rsp,%rsi,2)
1621; AVX512F-NEXT:    vmovaps (%rsp), %ymm0
1622; AVX512F-NEXT:    movq %rbp, %rsp
1623; AVX512F-NEXT:    popq %rbp
1624; AVX512F-NEXT:    retq
1625;
1626; AVX512BW-LABEL: arg_i16_v16i16:
1627; AVX512BW:       # %bb.0:
1628; AVX512BW-NEXT:    vpbroadcastw %esi, %ymm1
1629; AVX512BW-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1630; AVX512BW-NEXT:    vpbroadcastw %edi, %ymm0 {%k1}
1631; AVX512BW-NEXT:    retq
1632;
1633; X86AVX2-LABEL: arg_i16_v16i16:
1634; X86AVX2:       # %bb.0:
1635; X86AVX2-NEXT:    pushl %ebp
1636; X86AVX2-NEXT:    movl %esp, %ebp
1637; X86AVX2-NEXT:    andl $-32, %esp
1638; X86AVX2-NEXT:    subl $64, %esp
1639; X86AVX2-NEXT:    movl 12(%ebp), %eax
1640; X86AVX2-NEXT:    andl $15, %eax
1641; X86AVX2-NEXT:    movzwl 8(%ebp), %ecx
1642; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
1643; X86AVX2-NEXT:    movw %cx, (%esp,%eax,2)
1644; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
1645; X86AVX2-NEXT:    movl %ebp, %esp
1646; X86AVX2-NEXT:    popl %ebp
1647; X86AVX2-NEXT:    retl
1648  %ins = insertelement <16 x i16> %v, i16 %x, i32 %y
1649  ret <16 x i16> %ins
1650}
1651
1652define <8 x i32> @arg_i32_v8i32(<8 x i32> %v, i32 %x, i32 %y) nounwind {
1653; SSE-LABEL: arg_i32_v8i32:
1654; SSE:       # %bb.0:
1655; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1656; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
1657; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1658; SSE-NEXT:    andl $7, %esi
1659; SSE-NEXT:    movl %edi, -40(%rsp,%rsi,4)
1660; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1661; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
1662; SSE-NEXT:    retq
1663;
1664; AVX1OR2-LABEL: arg_i32_v8i32:
1665; AVX1OR2:       # %bb.0:
1666; AVX1OR2-NEXT:    pushq %rbp
1667; AVX1OR2-NEXT:    movq %rsp, %rbp
1668; AVX1OR2-NEXT:    andq $-32, %rsp
1669; AVX1OR2-NEXT:    subq $64, %rsp
1670; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1671; AVX1OR2-NEXT:    vmovaps %ymm0, (%rsp)
1672; AVX1OR2-NEXT:    andl $7, %esi
1673; AVX1OR2-NEXT:    movl %edi, (%rsp,%rsi,4)
1674; AVX1OR2-NEXT:    vmovaps (%rsp), %ymm0
1675; AVX1OR2-NEXT:    movq %rbp, %rsp
1676; AVX1OR2-NEXT:    popq %rbp
1677; AVX1OR2-NEXT:    retq
1678;
1679; AVX512-LABEL: arg_i32_v8i32:
1680; AVX512:       # %bb.0:
1681; AVX512-NEXT:    vpbroadcastd %esi, %ymm1
1682; AVX512-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1683; AVX512-NEXT:    vpbroadcastd %edi, %ymm0 {%k1}
1684; AVX512-NEXT:    retq
1685;
1686; X86AVX2-LABEL: arg_i32_v8i32:
1687; X86AVX2:       # %bb.0:
1688; X86AVX2-NEXT:    pushl %ebp
1689; X86AVX2-NEXT:    movl %esp, %ebp
1690; X86AVX2-NEXT:    andl $-32, %esp
1691; X86AVX2-NEXT:    subl $64, %esp
1692; X86AVX2-NEXT:    movl 12(%ebp), %eax
1693; X86AVX2-NEXT:    andl $7, %eax
1694; X86AVX2-NEXT:    movl 8(%ebp), %ecx
1695; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
1696; X86AVX2-NEXT:    movl %ecx, (%esp,%eax,4)
1697; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
1698; X86AVX2-NEXT:    movl %ebp, %esp
1699; X86AVX2-NEXT:    popl %ebp
1700; X86AVX2-NEXT:    retl
1701  %ins = insertelement <8 x i32> %v, i32 %x, i32 %y
1702  ret <8 x i32> %ins
1703}
1704
1705define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind {
1706; SSE-LABEL: arg_i64_v4i64:
1707; SSE:       # %bb.0:
1708; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1709; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
1710; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1711; SSE-NEXT:    andl $3, %esi
1712; SSE-NEXT:    movq %rdi, -40(%rsp,%rsi,8)
1713; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1714; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
1715; SSE-NEXT:    retq
1716;
1717; AVX1OR2-LABEL: arg_i64_v4i64:
1718; AVX1OR2:       # %bb.0:
1719; AVX1OR2-NEXT:    pushq %rbp
1720; AVX1OR2-NEXT:    movq %rsp, %rbp
1721; AVX1OR2-NEXT:    andq $-32, %rsp
1722; AVX1OR2-NEXT:    subq $64, %rsp
1723; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1724; AVX1OR2-NEXT:    vmovaps %ymm0, (%rsp)
1725; AVX1OR2-NEXT:    andl $3, %esi
1726; AVX1OR2-NEXT:    movq %rdi, (%rsp,%rsi,8)
1727; AVX1OR2-NEXT:    vmovaps (%rsp), %ymm0
1728; AVX1OR2-NEXT:    movq %rbp, %rsp
1729; AVX1OR2-NEXT:    popq %rbp
1730; AVX1OR2-NEXT:    retq
1731;
1732; AVX512-LABEL: arg_i64_v4i64:
1733; AVX512:       # %bb.0:
1734; AVX512-NEXT:    movl %esi, %eax
1735; AVX512-NEXT:    vpbroadcastq %rax, %ymm1
1736; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1737; AVX512-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
1738; AVX512-NEXT:    retq
1739;
1740; X86AVX2-LABEL: arg_i64_v4i64:
1741; X86AVX2:       # %bb.0:
1742; X86AVX2-NEXT:    pushl %ebp
1743; X86AVX2-NEXT:    movl %esp, %ebp
1744; X86AVX2-NEXT:    pushl %esi
1745; X86AVX2-NEXT:    andl $-32, %esp
1746; X86AVX2-NEXT:    subl $96, %esp
1747; X86AVX2-NEXT:    movl 8(%ebp), %edx
1748; X86AVX2-NEXT:    movl 12(%ebp), %eax
1749; X86AVX2-NEXT:    movl 16(%ebp), %ecx
1750; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
1751; X86AVX2-NEXT:    addl %ecx, %ecx
1752; X86AVX2-NEXT:    movl %ecx, %esi
1753; X86AVX2-NEXT:    andl $7, %esi
1754; X86AVX2-NEXT:    movl %edx, (%esp,%esi,4)
1755; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
1756; X86AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
1757; X86AVX2-NEXT:    incl %ecx
1758; X86AVX2-NEXT:    andl $7, %ecx
1759; X86AVX2-NEXT:    movl %eax, 32(%esp,%ecx,4)
1760; X86AVX2-NEXT:    vmovaps {{[0-9]+}}(%esp), %ymm0
1761; X86AVX2-NEXT:    leal -4(%ebp), %esp
1762; X86AVX2-NEXT:    popl %esi
1763; X86AVX2-NEXT:    popl %ebp
1764; X86AVX2-NEXT:    retl
1765  %ins = insertelement <4 x i64> %v, i64 %x, i32 %y
1766  ret <4 x i64> %ins
1767}
1768
1769define <8 x float> @arg_f32_v8f32(<8 x float> %v, float %x, i32 %y) nounwind {
1770; SSE-LABEL: arg_f32_v8f32:
1771; SSE:       # %bb.0:
1772; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
1773; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
1774; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1775; SSE-NEXT:    andl $7, %edi
1776; SSE-NEXT:    movss %xmm2, -40(%rsp,%rdi,4)
1777; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1778; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
1779; SSE-NEXT:    retq
1780;
1781; AVX1-LABEL: arg_f32_v8f32:
1782; AVX1:       # %bb.0:
1783; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1784; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
1785; AVX1-NEXT:    vmovd %edi, %xmm2
1786; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1787; AVX1-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1788; AVX1-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
1789; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1790; AVX1-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
1791; AVX1-NEXT:    retq
1792;
1793; AVX2-LABEL: arg_f32_v8f32:
1794; AVX2:       # %bb.0:
1795; AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
1796; AVX2-NEXT:    vmovd %edi, %xmm2
1797; AVX2-NEXT:    vpbroadcastd %xmm2, %ymm2
1798; AVX2-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
1799; AVX2-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
1800; AVX2-NEXT:    retq
1801;
1802; AVX512-LABEL: arg_f32_v8f32:
1803; AVX512:       # %bb.0:
1804; AVX512-NEXT:    vpbroadcastd %edi, %ymm2
1805; AVX512-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1
1806; AVX512-NEXT:    vbroadcastss %xmm1, %ymm0 {%k1}
1807; AVX512-NEXT:    retq
1808;
1809; X86AVX2-LABEL: arg_f32_v8f32:
1810; X86AVX2:       # %bb.0:
1811; X86AVX2-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %ymm1
1812; X86AVX2-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
1813; X86AVX2-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm2
1814; X86AVX2-NEXT:    vblendvps %ymm1, %ymm2, %ymm0, %ymm0
1815; X86AVX2-NEXT:    retl
1816  %ins = insertelement <8 x float> %v, float %x, i32 %y
1817  ret <8 x float> %ins
1818}
1819
1820define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind {
1821; SSE-LABEL: arg_f64_v4f64:
1822; SSE:       # %bb.0:
1823; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
1824; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
1825; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1826; SSE-NEXT:    andl $3, %edi
1827; SSE-NEXT:    movsd %xmm2, -40(%rsp,%rdi,8)
1828; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1829; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
1830; SSE-NEXT:    retq
1831;
1832; AVX1-LABEL: arg_f64_v4f64:
1833; AVX1:       # %bb.0:
1834; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1835; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
1836; AVX1-NEXT:    movl %edi, %eax
1837; AVX1-NEXT:    vmovq %rax, %xmm2
1838; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
1839; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1840; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
1841; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1842; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
1843; AVX1-NEXT:    retq
1844;
1845; AVX2-LABEL: arg_f64_v4f64:
1846; AVX2:       # %bb.0:
1847; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
1848; AVX2-NEXT:    movl %edi, %eax
1849; AVX2-NEXT:    vmovq %rax, %xmm2
1850; AVX2-NEXT:    vpbroadcastq %xmm2, %ymm2
1851; AVX2-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
1852; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
1853; AVX2-NEXT:    retq
1854;
1855; AVX512-LABEL: arg_f64_v4f64:
1856; AVX512:       # %bb.0:
1857; AVX512-NEXT:    movl %edi, %eax
1858; AVX512-NEXT:    vpbroadcastq %rax, %ymm2
1859; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1
1860; AVX512-NEXT:    vbroadcastsd %xmm1, %ymm0 {%k1}
1861; AVX512-NEXT:    retq
1862;
1863; X86AVX2-LABEL: arg_f64_v4f64:
1864; X86AVX2:       # %bb.0:
1865; X86AVX2-NEXT:    pushl %ebp
1866; X86AVX2-NEXT:    movl %esp, %ebp
1867; X86AVX2-NEXT:    andl $-32, %esp
1868; X86AVX2-NEXT:    subl $64, %esp
1869; X86AVX2-NEXT:    movl 16(%ebp), %eax
1870; X86AVX2-NEXT:    andl $3, %eax
1871; X86AVX2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1872; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
1873; X86AVX2-NEXT:    vmovsd %xmm1, (%esp,%eax,8)
1874; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
1875; X86AVX2-NEXT:    movl %ebp, %esp
1876; X86AVX2-NEXT:    popl %ebp
1877; X86AVX2-NEXT:    retl
1878  %ins = insertelement <4 x double> %v, double %x, i32 %y
1879  ret <4 x double> %ins
1880}
1881
1882define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind {
1883; SSE-LABEL: load_i8_v32i8:
1884; SSE:       # %bb.0:
1885; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1886; SSE-NEXT:    movzbl (%rdi), %eax
1887; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
1888; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1889; SSE-NEXT:    andl $31, %esi
1890; SSE-NEXT:    movb %al, -40(%rsp,%rsi)
1891; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1892; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
1893; SSE-NEXT:    retq
1894;
1895; AVX1OR2-LABEL: load_i8_v32i8:
1896; AVX1OR2:       # %bb.0:
1897; AVX1OR2-NEXT:    pushq %rbp
1898; AVX1OR2-NEXT:    movq %rsp, %rbp
1899; AVX1OR2-NEXT:    andq $-32, %rsp
1900; AVX1OR2-NEXT:    subq $64, %rsp
1901; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1902; AVX1OR2-NEXT:    movzbl (%rdi), %eax
1903; AVX1OR2-NEXT:    vmovaps %ymm0, (%rsp)
1904; AVX1OR2-NEXT:    andl $31, %esi
1905; AVX1OR2-NEXT:    movb %al, (%rsp,%rsi)
1906; AVX1OR2-NEXT:    vmovaps (%rsp), %ymm0
1907; AVX1OR2-NEXT:    movq %rbp, %rsp
1908; AVX1OR2-NEXT:    popq %rbp
1909; AVX1OR2-NEXT:    retq
1910;
1911; AVX512F-LABEL: load_i8_v32i8:
1912; AVX512F:       # %bb.0:
1913; AVX512F-NEXT:    pushq %rbp
1914; AVX512F-NEXT:    movq %rsp, %rbp
1915; AVX512F-NEXT:    andq $-32, %rsp
1916; AVX512F-NEXT:    subq $64, %rsp
1917; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
1918; AVX512F-NEXT:    movzbl (%rdi), %eax
1919; AVX512F-NEXT:    vmovaps %ymm0, (%rsp)
1920; AVX512F-NEXT:    andl $31, %esi
1921; AVX512F-NEXT:    movb %al, (%rsp,%rsi)
1922; AVX512F-NEXT:    vmovaps (%rsp), %ymm0
1923; AVX512F-NEXT:    movq %rbp, %rsp
1924; AVX512F-NEXT:    popq %rbp
1925; AVX512F-NEXT:    retq
1926;
1927; AVX512BW-LABEL: load_i8_v32i8:
1928; AVX512BW:       # %bb.0:
1929; AVX512BW-NEXT:    vpbroadcastb %esi, %ymm1
1930; AVX512BW-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
1931; AVX512BW-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1}
1932; AVX512BW-NEXT:    retq
1933;
1934; X86AVX2-LABEL: load_i8_v32i8:
1935; X86AVX2:       # %bb.0:
1936; X86AVX2-NEXT:    pushl %ebp
1937; X86AVX2-NEXT:    movl %esp, %ebp
1938; X86AVX2-NEXT:    andl $-32, %esp
1939; X86AVX2-NEXT:    subl $64, %esp
1940; X86AVX2-NEXT:    movl 12(%ebp), %eax
1941; X86AVX2-NEXT:    andl $31, %eax
1942; X86AVX2-NEXT:    movl 8(%ebp), %ecx
1943; X86AVX2-NEXT:    movzbl (%ecx), %ecx
1944; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
1945; X86AVX2-NEXT:    movb %cl, (%esp,%eax)
1946; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
1947; X86AVX2-NEXT:    movl %ebp, %esp
1948; X86AVX2-NEXT:    popl %ebp
1949; X86AVX2-NEXT:    retl
1950  %x = load i8, ptr %p
1951  %ins = insertelement <32 x i8> %v, i8 %x, i32 %y
1952  ret <32 x i8> %ins
1953}
1954
1955define <16 x i16> @load_i16_v16i16(<16 x i16> %v, ptr %p, i32 %y) nounwind {
1956; SSE-LABEL: load_i16_v16i16:
1957; SSE:       # %bb.0:
1958; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
1959; SSE-NEXT:    movzwl (%rdi), %eax
1960; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
1961; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
1962; SSE-NEXT:    andl $15, %esi
1963; SSE-NEXT:    movw %ax, -40(%rsp,%rsi,2)
1964; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
1965; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
1966; SSE-NEXT:    retq
1967;
1968; AVX1OR2-LABEL: load_i16_v16i16:
1969; AVX1OR2:       # %bb.0:
1970; AVX1OR2-NEXT:    pushq %rbp
1971; AVX1OR2-NEXT:    movq %rsp, %rbp
1972; AVX1OR2-NEXT:    andq $-32, %rsp
1973; AVX1OR2-NEXT:    subq $64, %rsp
1974; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
1975; AVX1OR2-NEXT:    movzwl (%rdi), %eax
1976; AVX1OR2-NEXT:    vmovaps %ymm0, (%rsp)
1977; AVX1OR2-NEXT:    andl $15, %esi
1978; AVX1OR2-NEXT:    movw %ax, (%rsp,%rsi,2)
1979; AVX1OR2-NEXT:    vmovaps (%rsp), %ymm0
1980; AVX1OR2-NEXT:    movq %rbp, %rsp
1981; AVX1OR2-NEXT:    popq %rbp
1982; AVX1OR2-NEXT:    retq
1983;
1984; AVX512F-LABEL: load_i16_v16i16:
1985; AVX512F:       # %bb.0:
1986; AVX512F-NEXT:    pushq %rbp
1987; AVX512F-NEXT:    movq %rsp, %rbp
1988; AVX512F-NEXT:    andq $-32, %rsp
1989; AVX512F-NEXT:    subq $64, %rsp
1990; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
1991; AVX512F-NEXT:    movzwl (%rdi), %eax
1992; AVX512F-NEXT:    vmovaps %ymm0, (%rsp)
1993; AVX512F-NEXT:    andl $15, %esi
1994; AVX512F-NEXT:    movw %ax, (%rsp,%rsi,2)
1995; AVX512F-NEXT:    vmovaps (%rsp), %ymm0
1996; AVX512F-NEXT:    movq %rbp, %rsp
1997; AVX512F-NEXT:    popq %rbp
1998; AVX512F-NEXT:    retq
1999;
2000; AVX512BW-LABEL: load_i16_v16i16:
2001; AVX512BW:       # %bb.0:
2002; AVX512BW-NEXT:    vpbroadcastw %esi, %ymm1
2003; AVX512BW-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2004; AVX512BW-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1}
2005; AVX512BW-NEXT:    retq
2006;
2007; X86AVX2-LABEL: load_i16_v16i16:
2008; X86AVX2:       # %bb.0:
2009; X86AVX2-NEXT:    pushl %ebp
2010; X86AVX2-NEXT:    movl %esp, %ebp
2011; X86AVX2-NEXT:    andl $-32, %esp
2012; X86AVX2-NEXT:    subl $64, %esp
2013; X86AVX2-NEXT:    movl 12(%ebp), %eax
2014; X86AVX2-NEXT:    andl $15, %eax
2015; X86AVX2-NEXT:    movl 8(%ebp), %ecx
2016; X86AVX2-NEXT:    movzwl (%ecx), %ecx
2017; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
2018; X86AVX2-NEXT:    movw %cx, (%esp,%eax,2)
2019; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
2020; X86AVX2-NEXT:    movl %ebp, %esp
2021; X86AVX2-NEXT:    popl %ebp
2022; X86AVX2-NEXT:    retl
2023  %x = load i16, ptr %p
2024  %ins = insertelement <16 x i16> %v, i16 %x, i32 %y
2025  ret <16 x i16> %ins
2026}
2027
2028define <8 x i32> @load_i32_v8i32(<8 x i32> %v, ptr %p, i32 %y) nounwind {
2029; SSE-LABEL: load_i32_v8i32:
2030; SSE:       # %bb.0:
2031; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
2032; SSE-NEXT:    movl (%rdi), %eax
2033; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
2034; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
2035; SSE-NEXT:    andl $7, %esi
2036; SSE-NEXT:    movl %eax, -40(%rsp,%rsi,4)
2037; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
2038; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
2039; SSE-NEXT:    retq
2040;
2041; AVX1OR2-LABEL: load_i32_v8i32:
2042; AVX1OR2:       # %bb.0:
2043; AVX1OR2-NEXT:    pushq %rbp
2044; AVX1OR2-NEXT:    movq %rsp, %rbp
2045; AVX1OR2-NEXT:    andq $-32, %rsp
2046; AVX1OR2-NEXT:    subq $64, %rsp
2047; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
2048; AVX1OR2-NEXT:    movl (%rdi), %eax
2049; AVX1OR2-NEXT:    vmovaps %ymm0, (%rsp)
2050; AVX1OR2-NEXT:    andl $7, %esi
2051; AVX1OR2-NEXT:    movl %eax, (%rsp,%rsi,4)
2052; AVX1OR2-NEXT:    vmovaps (%rsp), %ymm0
2053; AVX1OR2-NEXT:    movq %rbp, %rsp
2054; AVX1OR2-NEXT:    popq %rbp
2055; AVX1OR2-NEXT:    retq
2056;
2057; AVX512-LABEL: load_i32_v8i32:
2058; AVX512:       # %bb.0:
2059; AVX512-NEXT:    vpbroadcastd %esi, %ymm1
2060; AVX512-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2061; AVX512-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1}
2062; AVX512-NEXT:    retq
2063;
2064; X86AVX2-LABEL: load_i32_v8i32:
2065; X86AVX2:       # %bb.0:
2066; X86AVX2-NEXT:    pushl %ebp
2067; X86AVX2-NEXT:    movl %esp, %ebp
2068; X86AVX2-NEXT:    andl $-32, %esp
2069; X86AVX2-NEXT:    subl $64, %esp
2070; X86AVX2-NEXT:    movl 12(%ebp), %eax
2071; X86AVX2-NEXT:    andl $7, %eax
2072; X86AVX2-NEXT:    movl 8(%ebp), %ecx
2073; X86AVX2-NEXT:    movl (%ecx), %ecx
2074; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
2075; X86AVX2-NEXT:    movl %ecx, (%esp,%eax,4)
2076; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
2077; X86AVX2-NEXT:    movl %ebp, %esp
2078; X86AVX2-NEXT:    popl %ebp
2079; X86AVX2-NEXT:    retl
2080  %x = load i32, ptr %p
2081  %ins = insertelement <8 x i32> %v, i32 %x, i32 %y
2082  ret <8 x i32> %ins
2083}
2084
2085define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind {
2086; SSE-LABEL: load_i64_v4i64:
2087; SSE:       # %bb.0:
2088; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
2089; SSE-NEXT:    movq (%rdi), %rax
2090; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
2091; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
2092; SSE-NEXT:    andl $3, %esi
2093; SSE-NEXT:    movq %rax, -40(%rsp,%rsi,8)
2094; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
2095; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
2096; SSE-NEXT:    retq
2097;
2098; AVX1OR2-LABEL: load_i64_v4i64:
2099; AVX1OR2:       # %bb.0:
2100; AVX1OR2-NEXT:    pushq %rbp
2101; AVX1OR2-NEXT:    movq %rsp, %rbp
2102; AVX1OR2-NEXT:    andq $-32, %rsp
2103; AVX1OR2-NEXT:    subq $64, %rsp
2104; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
2105; AVX1OR2-NEXT:    movq (%rdi), %rax
2106; AVX1OR2-NEXT:    vmovaps %ymm0, (%rsp)
2107; AVX1OR2-NEXT:    andl $3, %esi
2108; AVX1OR2-NEXT:    movq %rax, (%rsp,%rsi,8)
2109; AVX1OR2-NEXT:    vmovaps (%rsp), %ymm0
2110; AVX1OR2-NEXT:    movq %rbp, %rsp
2111; AVX1OR2-NEXT:    popq %rbp
2112; AVX1OR2-NEXT:    retq
2113;
2114; AVX512-LABEL: load_i64_v4i64:
2115; AVX512:       # %bb.0:
2116; AVX512-NEXT:    movl %esi, %eax
2117; AVX512-NEXT:    vpbroadcastq %rax, %ymm1
2118; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2119; AVX512-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
2120; AVX512-NEXT:    retq
2121;
2122; X86AVX2-LABEL: load_i64_v4i64:
2123; X86AVX2:       # %bb.0:
2124; X86AVX2-NEXT:    pushl %ebp
2125; X86AVX2-NEXT:    movl %esp, %ebp
2126; X86AVX2-NEXT:    pushl %esi
2127; X86AVX2-NEXT:    andl $-32, %esp
2128; X86AVX2-NEXT:    subl $96, %esp
2129; X86AVX2-NEXT:    movl 12(%ebp), %eax
2130; X86AVX2-NEXT:    movl 8(%ebp), %ecx
2131; X86AVX2-NEXT:    movl (%ecx), %edx
2132; X86AVX2-NEXT:    movl 4(%ecx), %ecx
2133; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
2134; X86AVX2-NEXT:    addl %eax, %eax
2135; X86AVX2-NEXT:    movl %eax, %esi
2136; X86AVX2-NEXT:    andl $7, %esi
2137; X86AVX2-NEXT:    movl %edx, (%esp,%esi,4)
2138; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
2139; X86AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
2140; X86AVX2-NEXT:    incl %eax
2141; X86AVX2-NEXT:    andl $7, %eax
2142; X86AVX2-NEXT:    movl %ecx, 32(%esp,%eax,4)
2143; X86AVX2-NEXT:    vmovaps {{[0-9]+}}(%esp), %ymm0
2144; X86AVX2-NEXT:    leal -4(%ebp), %esp
2145; X86AVX2-NEXT:    popl %esi
2146; X86AVX2-NEXT:    popl %ebp
2147; X86AVX2-NEXT:    retl
2148  %x = load i64, ptr %p
2149  %ins = insertelement <4 x i64> %v, i64 %x, i32 %y
2150  ret <4 x i64> %ins
2151}
2152
2153define <8 x float> @load_f32_v8f32(<8 x float> %v, ptr %p, i32 %y) nounwind {
2154; SSE-LABEL: load_f32_v8f32:
2155; SSE:       # %bb.0:
2156; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
2157; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2158; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
2159; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
2160; SSE-NEXT:    andl $7, %esi
2161; SSE-NEXT:    movss %xmm2, -40(%rsp,%rsi,4)
2162; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
2163; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
2164; SSE-NEXT:    retq
2165;
2166; AVX1-LABEL: load_f32_v8f32:
2167; AVX1:       # %bb.0:
2168; AVX1-NEXT:    vmovd %esi, %xmm1
2169; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
2170; AVX1-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
2171; AVX1-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
2172; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
2173; AVX1-NEXT:    vbroadcastss (%rdi), %ymm2
2174; AVX1-NEXT:    vblendvps %ymm1, %ymm2, %ymm0, %ymm0
2175; AVX1-NEXT:    retq
2176;
2177; AVX2-LABEL: load_f32_v8f32:
2178; AVX2:       # %bb.0:
2179; AVX2-NEXT:    vbroadcastss (%rdi), %ymm1
2180; AVX2-NEXT:    vmovd %esi, %xmm2
2181; AVX2-NEXT:    vpbroadcastd %xmm2, %ymm2
2182; AVX2-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
2183; AVX2-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
2184; AVX2-NEXT:    retq
2185;
2186; AVX512-LABEL: load_f32_v8f32:
2187; AVX512:       # %bb.0:
2188; AVX512-NEXT:    vpbroadcastd %esi, %ymm1
2189; AVX512-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2190; AVX512-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1}
2191; AVX512-NEXT:    retq
2192;
2193; X86AVX2-LABEL: load_f32_v8f32:
2194; X86AVX2:       # %bb.0:
2195; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2196; X86AVX2-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %ymm1
2197; X86AVX2-NEXT:    vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
2198; X86AVX2-NEXT:    vbroadcastss (%eax), %ymm2
2199; X86AVX2-NEXT:    vblendvps %ymm1, %ymm2, %ymm0, %ymm0
2200; X86AVX2-NEXT:    retl
2201  %x = load float, ptr %p
2202  %ins = insertelement <8 x float> %v, float %x, i32 %y
2203  ret <8 x float> %ins
2204}
2205
2206define <4 x double> @load_f64_v4f64(<4 x double> %v, ptr %p, i32 %y) nounwind {
2207; SSE-LABEL: load_f64_v4f64:
2208; SSE:       # %bb.0:
2209; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
2210; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
2211; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
2212; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
2213; SSE-NEXT:    andl $3, %esi
2214; SSE-NEXT:    movsd %xmm2, -40(%rsp,%rsi,8)
2215; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
2216; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
2217; SSE-NEXT:    retq
2218;
2219; AVX1-LABEL: load_f64_v4f64:
2220; AVX1:       # %bb.0:
2221; AVX1-NEXT:    movl %esi, %eax
2222; AVX1-NEXT:    vmovq %rax, %xmm1
2223; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
2224; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
2225; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
2226; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
2227; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm2
2228; AVX1-NEXT:    vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
2229; AVX1-NEXT:    retq
2230;
2231; AVX2-LABEL: load_f64_v4f64:
2232; AVX2:       # %bb.0:
2233; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm1
2234; AVX2-NEXT:    movl %esi, %eax
2235; AVX2-NEXT:    vmovq %rax, %xmm2
2236; AVX2-NEXT:    vpbroadcastq %xmm2, %ymm2
2237; AVX2-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
2238; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
2239; AVX2-NEXT:    retq
2240;
2241; AVX512-LABEL: load_f64_v4f64:
2242; AVX512:       # %bb.0:
2243; AVX512-NEXT:    movl %esi, %eax
2244; AVX512-NEXT:    vpbroadcastq %rax, %ymm1
2245; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
2246; AVX512-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
2247; AVX512-NEXT:    retq
2248;
2249; X86AVX2-LABEL: load_f64_v4f64:
2250; X86AVX2:       # %bb.0:
2251; X86AVX2-NEXT:    pushl %ebp
2252; X86AVX2-NEXT:    movl %esp, %ebp
2253; X86AVX2-NEXT:    andl $-32, %esp
2254; X86AVX2-NEXT:    subl $64, %esp
2255; X86AVX2-NEXT:    movl 12(%ebp), %eax
2256; X86AVX2-NEXT:    andl $3, %eax
2257; X86AVX2-NEXT:    movl 8(%ebp), %ecx
2258; X86AVX2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
2259; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
2260; X86AVX2-NEXT:    vmovsd %xmm1, (%esp,%eax,8)
2261; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
2262; X86AVX2-NEXT:    movl %ebp, %esp
2263; X86AVX2-NEXT:    popl %ebp
2264; X86AVX2-NEXT:    retl
2265  %x = load double, ptr %p
2266  %ins = insertelement <4 x double> %v, double %x, i32 %y
2267  ret <4 x double> %ins
2268}
2269
2270; Don't die trying to insert to an invalid index.
2271
2272define i32 @PR44139(ptr %p) {
2273; SSE-LABEL: PR44139:
2274; SSE:       # %bb.0:
2275; SSE-NEXT:    movl (%rdi), %eax
2276; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
2277; SSE-NEXT:    movdqa %xmm0, 96(%rdi)
2278; SSE-NEXT:    movdqa %xmm0, 112(%rdi)
2279; SSE-NEXT:    movdqa %xmm0, 64(%rdi)
2280; SSE-NEXT:    movdqa %xmm0, 80(%rdi)
2281; SSE-NEXT:    movdqa %xmm0, 32(%rdi)
2282; SSE-NEXT:    movdqa %xmm0, 48(%rdi)
2283; SSE-NEXT:    movdqa %xmm0, (%rdi)
2284; SSE-NEXT:    movdqa %xmm0, 16(%rdi)
2285; SSE-NEXT:    leal 2147483647(%rax), %ecx
2286; SSE-NEXT:    testl %eax, %eax
2287; SSE-NEXT:    cmovnsl %eax, %ecx
2288; SSE-NEXT:    andl $-2147483648, %ecx # imm = 0x80000000
2289; SSE-NEXT:    addl %eax, %ecx
2290; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
2291; SSE-NEXT:    xorl %edx, %edx
2292; SSE-NEXT:    divl %ecx
2293; SSE-NEXT:    retq
2294;
2295; AVX1-LABEL: PR44139:
2296; AVX1:       # %bb.0:
2297; AVX1-NEXT:    movq (%rdi), %rax
2298; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
2299; AVX1-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
2300; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2301; AVX1-NEXT:    vmovaps %ymm0, 64(%rdi)
2302; AVX1-NEXT:    vmovaps %ymm0, 96(%rdi)
2303; AVX1-NEXT:    vmovaps %ymm0, 32(%rdi)
2304; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
2305; AVX1-NEXT:    leal 2147483647(%rax), %ecx
2306; AVX1-NEXT:    testl %eax, %eax
2307; AVX1-NEXT:    cmovnsl %eax, %ecx
2308; AVX1-NEXT:    andl $-2147483648, %ecx # imm = 0x80000000
2309; AVX1-NEXT:    addl %eax, %ecx
2310; AVX1-NEXT:    # kill: def $eax killed $eax killed $rax
2311; AVX1-NEXT:    xorl %edx, %edx
2312; AVX1-NEXT:    divl %ecx
2313; AVX1-NEXT:    vzeroupper
2314; AVX1-NEXT:    retq
2315;
2316; AVX2-LABEL: PR44139:
2317; AVX2:       # %bb.0:
2318; AVX2-NEXT:    movq (%rdi), %rax
2319; AVX2-NEXT:    vpbroadcastq (%rdi), %ymm0
2320; AVX2-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
2321; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2322; AVX2-NEXT:    vmovdqa %ymm0, 64(%rdi)
2323; AVX2-NEXT:    vmovdqa %ymm0, 96(%rdi)
2324; AVX2-NEXT:    vmovdqa %ymm0, 32(%rdi)
2325; AVX2-NEXT:    vmovdqa %ymm1, (%rdi)
2326; AVX2-NEXT:    leal 2147483647(%rax), %ecx
2327; AVX2-NEXT:    testl %eax, %eax
2328; AVX2-NEXT:    cmovnsl %eax, %ecx
2329; AVX2-NEXT:    andl $-2147483648, %ecx # imm = 0x80000000
2330; AVX2-NEXT:    addl %eax, %ecx
2331; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
2332; AVX2-NEXT:    xorl %edx, %edx
2333; AVX2-NEXT:    divl %ecx
2334; AVX2-NEXT:    vzeroupper
2335; AVX2-NEXT:    retq
2336;
2337; AVX512-LABEL: PR44139:
2338; AVX512:       # %bb.0:
2339; AVX512-NEXT:    movq (%rdi), %rax
2340; AVX512-NEXT:    vpbroadcastq (%rdi), %zmm0
2341; AVX512-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
2342; AVX512-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm1
2343; AVX512-NEXT:    vmovdqa64 %zmm0, 64(%rdi)
2344; AVX512-NEXT:    vmovdqa64 %zmm1, (%rdi)
2345; AVX512-NEXT:    leal 2147483647(%rax), %ecx
2346; AVX512-NEXT:    testl %eax, %eax
2347; AVX512-NEXT:    cmovnsl %eax, %ecx
2348; AVX512-NEXT:    andl $-2147483648, %ecx # imm = 0x80000000
2349; AVX512-NEXT:    addl %eax, %ecx
2350; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
2351; AVX512-NEXT:    xorl %edx, %edx
2352; AVX512-NEXT:    divl %ecx
2353; AVX512-NEXT:    vzeroupper
2354; AVX512-NEXT:    retq
2355;
2356; X86AVX2-LABEL: PR44139:
2357; X86AVX2:       # %bb.0:
2358; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2359; X86AVX2-NEXT:    vbroadcastsd (%ecx), %ymm0
2360; X86AVX2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0]
2361; X86AVX2-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2362; X86AVX2-NEXT:    vmovaps %ymm0, 64(%ecx)
2363; X86AVX2-NEXT:    vmovaps %ymm0, 96(%ecx)
2364; X86AVX2-NEXT:    vmovaps %ymm0, 32(%ecx)
2365; X86AVX2-NEXT:    movl (%ecx), %eax
2366; X86AVX2-NEXT:    vmovaps %ymm1, (%ecx)
2367; X86AVX2-NEXT:    leal 2147483647(%eax), %ecx
2368; X86AVX2-NEXT:    testl %eax, %eax
2369; X86AVX2-NEXT:    cmovnsl %eax, %ecx
2370; X86AVX2-NEXT:    andl $-2147483648, %ecx # imm = 0x80000000
2371; X86AVX2-NEXT:    addl %eax, %ecx
2372; X86AVX2-NEXT:    xorl %edx, %edx
2373; X86AVX2-NEXT:    divl %ecx
2374; X86AVX2-NEXT:    vzeroupper
2375; X86AVX2-NEXT:    retl
2376  %L = load <16 x i64>, ptr %p
2377  %E1 = extractelement <16 x i64> %L, i64 0
2378  %tempvector = insertelement <16 x i64> undef, i64 %E1, i32 0
2379  %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
2380  %C3 = icmp sgt i64 9223372036854775807, -9223372036854775808
2381  %t0 = trunc <16 x i64> %vector to <16 x i32>
2382  %I4 = insertelement <16 x i64> %vector, i64 %E1, i1 %C3
2383  store <16 x i64> %I4, ptr %p
2384  %elt = extractelement <16 x i32> %t0, i32 0
2385  %B = srem i32 %elt, -2147483648
2386  %B9 = udiv i32 %elt, %B
2387  ret i32 %B9
2388}
2389