xref: /llvm-project/llvm/test/CodeGen/X86/insertelement-ones.ll (revision 8b43c1be23119c1024bed0a8ce392bc73727e2e2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
10
11define <2 x i64> @insert_v2i64_x1(<2 x i64> %a) {
12; SSE2-LABEL: insert_v2i64_x1:
13; SSE2:       # %bb.0:
14; SSE2-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
15; SSE2-NEXT:    retq
16;
17; SSE3-LABEL: insert_v2i64_x1:
18; SSE3:       # %bb.0:
19; SSE3-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
20; SSE3-NEXT:    retq
21;
22; SSSE3-LABEL: insert_v2i64_x1:
23; SSSE3:       # %bb.0:
24; SSSE3-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
25; SSSE3-NEXT:    retq
26;
27; SSE41-LABEL: insert_v2i64_x1:
28; SSE41:       # %bb.0:
29; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
30; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
31; SSE41-NEXT:    retq
32;
33; AVX1-LABEL: insert_v2i64_x1:
34; AVX1:       # %bb.0:
35; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
36; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
37; AVX1-NEXT:    retq
38;
39; AVX2-LABEL: insert_v2i64_x1:
40; AVX2:       # %bb.0:
41; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
42; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
43; AVX2-NEXT:    retq
44;
45; AVX512-LABEL: insert_v2i64_x1:
46; AVX512:       # %bb.0:
47; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
48; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
49; AVX512-NEXT:    retq
50  %1 = insertelement <2 x i64> %a, i64 -1, i32 0
51  ret <2 x i64> %1
52}
53
54define <4 x i64> @insert_v4i64_01x3(<4 x i64> %a) {
55; SSE2-LABEL: insert_v4i64_01x3:
56; SSE2:       # %bb.0:
57; SSE2-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
58; SSE2-NEXT:    retq
59;
60; SSE3-LABEL: insert_v4i64_01x3:
61; SSE3:       # %bb.0:
62; SSE3-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
63; SSE3-NEXT:    retq
64;
65; SSSE3-LABEL: insert_v4i64_01x3:
66; SSSE3:       # %bb.0:
67; SSSE3-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
68; SSSE3-NEXT:    retq
69;
70; SSE41-LABEL: insert_v4i64_01x3:
71; SSE41:       # %bb.0:
72; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
73; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
74; SSE41-NEXT:    retq
75;
76; AVX1-LABEL: insert_v4i64_01x3:
77; AVX1:       # %bb.0:
78; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
79; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
80; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
81; AVX1-NEXT:    retq
82;
83; AVX2-LABEL: insert_v4i64_01x3:
84; AVX2:       # %bb.0:
85; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
86; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
87; AVX2-NEXT:    retq
88;
89; AVX512-LABEL: insert_v4i64_01x3:
90; AVX512:       # %bb.0:
91; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
92; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
93; AVX512-NEXT:    retq
94  %1 = insertelement <4 x i64> %a, i64 -1, i32 2
95  ret <4 x i64> %1
96}
97
98define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
99; SSE2-LABEL: insert_v4i32_01x3:
100; SSE2:       # %bb.0:
101; SSE2-NEXT:    movl $-1, %eax
102; SSE2-NEXT:    movd %eax, %xmm1
103; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
104; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
105; SSE2-NEXT:    retq
106;
107; SSE3-LABEL: insert_v4i32_01x3:
108; SSE3:       # %bb.0:
109; SSE3-NEXT:    movl $-1, %eax
110; SSE3-NEXT:    movd %eax, %xmm1
111; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
112; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
113; SSE3-NEXT:    retq
114;
115; SSSE3-LABEL: insert_v4i32_01x3:
116; SSSE3:       # %bb.0:
117; SSSE3-NEXT:    movl $-1, %eax
118; SSSE3-NEXT:    movd %eax, %xmm1
119; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
120; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
121; SSSE3-NEXT:    retq
122;
123; SSE41-LABEL: insert_v4i32_01x3:
124; SSE41:       # %bb.0:
125; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
126; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
127; SSE41-NEXT:    retq
128;
129; AVX1-LABEL: insert_v4i32_01x3:
130; AVX1:       # %bb.0:
131; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
132; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
133; AVX1-NEXT:    retq
134;
135; AVX2-LABEL: insert_v4i32_01x3:
136; AVX2:       # %bb.0:
137; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
138; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
139; AVX2-NEXT:    retq
140;
141; AVX512-LABEL: insert_v4i32_01x3:
142; AVX512:       # %bb.0:
143; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
144; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
145; AVX512-NEXT:    retq
146  %1 = insertelement <4 x i32> %a, i32 -1, i32 2
147  ret <4 x i32> %1
148}
149
150define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
151; SSE2-LABEL: insert_v8i32_x12345x7:
152; SSE2:       # %bb.0:
153; SSE2-NEXT:    movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
154; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
155; SSE2-NEXT:    movl $-1, %eax
156; SSE2-NEXT:    movd %eax, %xmm2
157; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
158; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
159; SSE2-NEXT:    retq
160;
161; SSE3-LABEL: insert_v8i32_x12345x7:
162; SSE3:       # %bb.0:
163; SSE3-NEXT:    movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
164; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
165; SSE3-NEXT:    movl $-1, %eax
166; SSE3-NEXT:    movd %eax, %xmm2
167; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
168; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
169; SSE3-NEXT:    retq
170;
171; SSSE3-LABEL: insert_v8i32_x12345x7:
172; SSSE3:       # %bb.0:
173; SSSE3-NEXT:    movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
174; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
175; SSSE3-NEXT:    movl $-1, %eax
176; SSSE3-NEXT:    movd %eax, %xmm2
177; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
178; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
179; SSSE3-NEXT:    retq
180;
181; SSE41-LABEL: insert_v8i32_x12345x7:
182; SSE41:       # %bb.0:
183; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
184; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
185; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
186; SSE41-NEXT:    retq
187;
188; AVX1-LABEL: insert_v8i32_x12345x7:
189; AVX1:       # %bb.0:
190; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
191; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
192; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
193; AVX1-NEXT:    retq
194;
195; AVX2-LABEL: insert_v8i32_x12345x7:
196; AVX2:       # %bb.0:
197; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
198; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
199; AVX2-NEXT:    retq
200;
201; AVX512-LABEL: insert_v8i32_x12345x7:
202; AVX512:       # %bb.0:
203; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
204; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
205; AVX512-NEXT:    retq
206  %1 = insertelement <8 x i32> %a, i32 -1, i32 0
207  %2 = insertelement <8 x i32> %1, i32 -1, i32 6
208  ret <8 x i32> %2
209}
210
211define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
212; SSE2-LABEL: insert_v8i16_x12345x7:
213; SSE2:       # %bb.0:
214; SSE2-NEXT:    movl $65535, %eax # imm = 0xFFFF
215; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
216; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
217; SSE2-NEXT:    retq
218;
219; SSE3-LABEL: insert_v8i16_x12345x7:
220; SSE3:       # %bb.0:
221; SSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
222; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
223; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
224; SSE3-NEXT:    retq
225;
226; SSSE3-LABEL: insert_v8i16_x12345x7:
227; SSSE3:       # %bb.0:
228; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
229; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
230; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
231; SSSE3-NEXT:    retq
232;
233; SSE41-LABEL: insert_v8i16_x12345x7:
234; SSE41:       # %bb.0:
235; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
236; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
237; SSE41-NEXT:    retq
238;
239; AVX-LABEL: insert_v8i16_x12345x7:
240; AVX:       # %bb.0:
241; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
242; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
243; AVX-NEXT:    retq
244  %1 = insertelement <8 x i16> %a, i16 -1, i32 0
245  %2 = insertelement <8 x i16> %1, i16 -1, i32 6
246  ret <8 x i16> %2
247}
248
249define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
250; SSE2-LABEL: insert_v16i16_x12345x789ABCDEx:
251; SSE2:       # %bb.0:
252; SSE2-NEXT:    movl $65535, %eax # imm = 0xFFFF
253; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
254; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
255; SSE2-NEXT:    pinsrw $7, %eax, %xmm1
256; SSE2-NEXT:    retq
257;
258; SSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
259; SSE3:       # %bb.0:
260; SSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
261; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
262; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
263; SSE3-NEXT:    pinsrw $7, %eax, %xmm1
264; SSE3-NEXT:    retq
265;
266; SSSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
267; SSSE3:       # %bb.0:
268; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
269; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
270; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
271; SSSE3-NEXT:    pinsrw $7, %eax, %xmm1
272; SSSE3-NEXT:    retq
273;
274; SSE41-LABEL: insert_v16i16_x12345x789ABCDEx:
275; SSE41:       # %bb.0:
276; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
277; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
278; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
279; SSE41-NEXT:    retq
280;
281; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
282; AVX1:       # %bb.0:
283; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = [65535,0,0,0]
284; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
285; AVX1-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
286; AVX1-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
287; AVX1-NEXT:    retq
288;
289; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
290; AVX2:       # %bb.0:
291; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
292; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
293; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
294; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
295; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
296; AVX2-NEXT:    retq
297;
298; AVX512-LABEL: insert_v16i16_x12345x789ABCDEx:
299; AVX512:       # %bb.0:
300; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
301; AVX512-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
302; AVX512-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
303; AVX512-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
304; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
305; AVX512-NEXT:    retq
306  %1 = insertelement <16 x i16> %a, i16 -1, i32 0
307  %2 = insertelement <16 x i16> %1, i16 -1, i32 6
308  %3 = insertelement <16 x i16> %2, i16 -1, i32 15
309  ret <16 x i16> %3
310}
311
312define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
313; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
314; SSE2:       # %bb.0:
315; SSE2-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
316; SSE2-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
317; SSE2-NEXT:    retq
318;
319; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
320; SSE3:       # %bb.0:
321; SSE3-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
322; SSE3-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
323; SSE3-NEXT:    retq
324;
325; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
326; SSSE3:       # %bb.0:
327; SSSE3-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
328; SSSE3-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
329; SSSE3-NEXT:    retq
330;
331; SSE41-LABEL: insert_v16i8_x123456789ABCDEx:
332; SSE41:       # %bb.0:
333; SSE41-NEXT:    movl $255, %eax
334; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
335; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
336; SSE41-NEXT:    retq
337;
338; AVX-LABEL: insert_v16i8_x123456789ABCDEx:
339; AVX:       # %bb.0:
340; AVX-NEXT:    movl $255, %eax
341; AVX-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
342; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
343; AVX-NEXT:    retq
344  %1 = insertelement <16 x i8> %a, i8 -1, i32 0
345  %2 = insertelement <16 x i8> %1, i8 -1, i32 15
346  ret <16 x i8> %2
347}
348
349define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
350; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
351; SSE2:       # %bb.0:
352; SSE2-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
353; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
354; SSE2-NEXT:    orps %xmm2, %xmm0
355; SSE2-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
356; SSE2-NEXT:    orps %xmm2, %xmm1
357; SSE2-NEXT:    retq
358;
359; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
360; SSE3:       # %bb.0:
361; SSE3-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
362; SSE3-NEXT:    movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
363; SSE3-NEXT:    orps %xmm2, %xmm0
364; SSE3-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
365; SSE3-NEXT:    orps %xmm2, %xmm1
366; SSE3-NEXT:    retq
367;
368; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
369; SSSE3:       # %bb.0:
370; SSSE3-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
371; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
372; SSSE3-NEXT:    orps %xmm2, %xmm0
373; SSSE3-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
374; SSSE3-NEXT:    orps %xmm2, %xmm1
375; SSSE3-NEXT:    retq
376;
377; SSE41-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
378; SSE41:       # %bb.0:
379; SSE41-NEXT:    movl $255, %eax
380; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
381; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
382; SSE41-NEXT:    pinsrb $14, %eax, %xmm1
383; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
384; SSE41-NEXT:    retq
385;
386; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
387; AVX1:       # %bb.0:
388; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = [255,0,0,0]
389; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
390; AVX1-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
391; AVX1-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
392; AVX1-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
393; AVX1-NEXT:    retq
394;
395; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
396; AVX2:       # %bb.0:
397; AVX2-NEXT:    movl $255, %eax
398; AVX2-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
399; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
400; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
401; AVX2-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
402; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
403; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
404; AVX2-NEXT:    retq
405;
406; AVX512-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
407; AVX512:       # %bb.0:
408; AVX512-NEXT:    movl $255, %eax
409; AVX512-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
410; AVX512-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
411; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
412; AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
413; AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
414; AVX512-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
415; AVX512-NEXT:    retq
416  %1 = insertelement <32 x i8> %a, i8 -1, i32 0
417  %2 = insertelement <32 x i8> %1, i8 -1, i32 15
418  %3 = insertelement <32 x i8> %2, i8 -1, i32 30
419  %4 = insertelement <32 x i8> %3, i8 -1, i32 31
420  ret <32 x i8> %4
421}
422