xref: /llvm-project/llvm/test/CodeGen/X86/vselect.ll (revision 8630a7ba7c13f43a4dabd64d4ae0495e90f3b644)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6
7; PR66101 - Fold select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
8define <4 x i32> @masked_select_const(<4 x i32> %a, <4 x i32> %x, <4 x i32> %y) {
9; SSE-LABEL: masked_select_const:
10; SSE:       # %bb.0:
11; SSE-NEXT:    pcmpgtd %xmm2, %xmm1
12; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13; SSE-NEXT:    paddd %xmm1, %xmm0
14; SSE-NEXT:    retq
15;
16; AVX1-LABEL: masked_select_const:
17; AVX1:       # %bb.0:
18; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm1, %xmm1
19; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
20; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
21; AVX1-NEXT:    retq
22;
23; AVX2-LABEL: masked_select_const:
24; AVX2:       # %bb.0:
25; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272]
26; AVX2-NEXT:    vpcmpgtd %xmm2, %xmm1, %xmm1
27; AVX2-NEXT:    vpand %xmm3, %xmm1, %xmm1
28; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
29; AVX2-NEXT:    retq
30  %sub.i = add <4 x i32> %a, <i32 -24, i32 -24, i32 -24, i32 -24>
31  %cmp.i = icmp sgt <4 x i32> %x, %y
32  %sel = select <4 x i1> %cmp.i, <4 x i32> %sub.i, <4 x i32> %a
33  ret <4 x i32> %sel
34}
35
36; Verify that we don't emit packed vector shifts instructions if the
37; condition used by the vector select is a vector of constants.
38
39define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
40; SSE2-LABEL: test1:
41; SSE2:       # %bb.0:
42; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
43; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
44; SSE2-NEXT:    retq
45;
46; SSE41-LABEL: test1:
47; SSE41:       # %bb.0:
48; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
49; SSE41-NEXT:    retq
50;
51; AVX-LABEL: test1:
52; AVX:       # %bb.0:
53; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
54; AVX-NEXT:    retq
55  %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a, <4 x float> %b
56  ret <4 x float> %1
57}
58
59define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
60; SSE2-LABEL: test2:
61; SSE2:       # %bb.0:
62; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
63; SSE2-NEXT:    retq
64;
65; SSE41-LABEL: test2:
66; SSE41:       # %bb.0:
67; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
68; SSE41-NEXT:    retq
69;
70; AVX-LABEL: test2:
71; AVX:       # %bb.0:
72; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
73; AVX-NEXT:    retq
74  %1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
75  ret <4 x float> %1
76}
77
78define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
79; SSE2-LABEL: test3:
80; SSE2:       # %bb.0:
81; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
82; SSE2-NEXT:    retq
83;
84; SSE41-LABEL: test3:
85; SSE41:       # %bb.0:
86; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
87; SSE41-NEXT:    retq
88;
89; AVX-LABEL: test3:
90; AVX:       # %bb.0:
91; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
92; AVX-NEXT:    retq
93  %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
94  ret <4 x float> %1
95}
96
97define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
98; SSE-LABEL: test4:
99; SSE:       # %bb.0:
100; SSE-NEXT:    movaps %xmm1, %xmm0
101; SSE-NEXT:    retq
102;
103; AVX-LABEL: test4:
104; AVX:       # %bb.0:
105; AVX-NEXT:    vmovaps %xmm1, %xmm0
106; AVX-NEXT:    retq
107  %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
108  ret <4 x float> %1
109}
110
111define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
112; SSE-LABEL: test5:
113; SSE:       # %bb.0:
114; SSE-NEXT:    retq
115;
116; AVX-LABEL: test5:
117; AVX:       # %bb.0:
118; AVX-NEXT:    retq
119  %1 = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
120  ret <4 x float> %1
121}
122
123define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
124; SSE-LABEL: test6:
125; SSE:       # %bb.0:
126; SSE-NEXT:    retq
127;
128; AVX-LABEL: test6:
129; AVX:       # %bb.0:
130; AVX-NEXT:    retq
131  %1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a
132  ret <8 x i16> %1
133}
134
135define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
136; SSE2-LABEL: test7:
137; SSE2:       # %bb.0:
138; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
139; SSE2-NEXT:    retq
140;
141; SSE41-LABEL: test7:
142; SSE41:       # %bb.0:
143; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
144; SSE41-NEXT:    retq
145;
146; AVX-LABEL: test7:
147; AVX:       # %bb.0:
148; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
149; AVX-NEXT:    retq
150  %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
151  ret <8 x i16> %1
152}
153
154define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) {
155; SSE2-LABEL: test8:
156; SSE2:       # %bb.0:
157; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
158; SSE2-NEXT:    retq
159;
160; SSE41-LABEL: test8:
161; SSE41:       # %bb.0:
162; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
163; SSE41-NEXT:    retq
164;
165; AVX-LABEL: test8:
166; AVX:       # %bb.0:
167; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
168; AVX-NEXT:    retq
169  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
170  ret <8 x i16> %1
171}
172
173define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
174; SSE-LABEL: test9:
175; SSE:       # %bb.0:
176; SSE-NEXT:    movaps %xmm1, %xmm0
177; SSE-NEXT:    retq
178;
179; AVX-LABEL: test9:
180; AVX:       # %bb.0:
181; AVX-NEXT:    vmovaps %xmm1, %xmm0
182; AVX-NEXT:    retq
183  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
184  ret <8 x i16> %1
185}
186
187define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) {
188; SSE-LABEL: test10:
189; SSE:       # %bb.0:
190; SSE-NEXT:    retq
191;
192; AVX-LABEL: test10:
193; AVX:       # %bb.0:
194; AVX-NEXT:    retq
195  %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
196  ret <8 x i16> %1
197}
198
199define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) {
200; SSE2-LABEL: test11:
201; SSE2:       # %bb.0:
202; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,65535,65535,0,0,65535,65535,0]
203; SSE2-NEXT:    andps %xmm2, %xmm0
204; SSE2-NEXT:    andnps %xmm1, %xmm2
205; SSE2-NEXT:    orps %xmm2, %xmm0
206; SSE2-NEXT:    retq
207;
208; SSE41-LABEL: test11:
209; SSE41:       # %bb.0:
210; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
211; SSE41-NEXT:    retq
212;
213; AVX-LABEL: test11:
214; AVX:       # %bb.0:
215; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
216; AVX-NEXT:    retq
217  %1 = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
218  ret <8 x i16> %1
219}
220
221define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
222; SSE-LABEL: test12:
223; SSE:       # %bb.0:
224; SSE-NEXT:    movaps %xmm1, %xmm0
225; SSE-NEXT:    retq
226;
227; AVX-LABEL: test12:
228; AVX:       # %bb.0:
229; AVX-NEXT:    vmovaps %xmm1, %xmm0
230; AVX-NEXT:    retq
231  %1 = select <8 x i1> <i1 false, i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
232  ret <8 x i16> %1
233}
234
235define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) {
236; SSE-LABEL: test13:
237; SSE:       # %bb.0:
238; SSE-NEXT:    movaps %xmm1, %xmm0
239; SSE-NEXT:    retq
240;
241; AVX-LABEL: test13:
242; AVX:       # %bb.0:
243; AVX-NEXT:    vmovaps %xmm1, %xmm0
244; AVX-NEXT:    retq
245  %1 = select <8 x i1> <i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i16> %a, <8 x i16> %b
246  ret <8 x i16> %1
247}
248
249; Fold (vselect (build_vector AllOnes), N1, N2) -> N1
250define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
251; SSE-LABEL: test14:
252; SSE:       # %bb.0:
253; SSE-NEXT:    retq
254;
255; AVX-LABEL: test14:
256; AVX:       # %bb.0:
257; AVX-NEXT:    retq
258  %1 = select <4 x i1> <i1 true, i1 undef, i1 true, i1 undef>, <4 x float> %a, <4 x float> %b
259  ret <4 x float> %1
260}
261
262define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
263; SSE-LABEL: test15:
264; SSE:       # %bb.0:
265; SSE-NEXT:    retq
266;
267; AVX-LABEL: test15:
268; AVX:       # %bb.0:
269; AVX-NEXT:    retq
270  %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
271  ret <8 x i16> %1
272}
273
274; Fold (vselect (build_vector AllZeros), N1, N2) -> N2
275define <4 x float> @test16(<4 x float> %a, <4 x float> %b) {
276; SSE-LABEL: test16:
277; SSE:       # %bb.0:
278; SSE-NEXT:    movaps %xmm1, %xmm0
279; SSE-NEXT:    retq
280;
281; AVX-LABEL: test16:
282; AVX:       # %bb.0:
283; AVX-NEXT:    vmovaps %xmm1, %xmm0
284; AVX-NEXT:    retq
285  %1 = select <4 x i1> <i1 false, i1 undef, i1 false, i1 undef>, <4 x float> %a, <4 x float> %b
286  ret <4 x float> %1
287}
288
289define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) {
290; SSE-LABEL: test17:
291; SSE:       # %bb.0:
292; SSE-NEXT:    movaps %xmm1, %xmm0
293; SSE-NEXT:    retq
294;
295; AVX-LABEL: test17:
296; AVX:       # %bb.0:
297; AVX-NEXT:    vmovaps %xmm1, %xmm0
298; AVX-NEXT:    retq
299  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
300  ret <8 x i16> %1
301}
302
303define <4 x float> @test18(<4 x float> %a, <4 x float> %b) {
304; SSE2-LABEL: test18:
305; SSE2:       # %bb.0:
306; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
307; SSE2-NEXT:    retq
308;
309; SSE41-LABEL: test18:
310; SSE41:       # %bb.0:
311; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
312; SSE41-NEXT:    retq
313;
314; AVX-LABEL: test18:
315; AVX:       # %bb.0:
316; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
317; AVX-NEXT:    retq
318  %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
319  ret <4 x float> %1
320}
321
322define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
323; SSE2-LABEL: test19:
324; SSE2:       # %bb.0:
325; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
326; SSE2-NEXT:    retq
327;
328; SSE41-LABEL: test19:
329; SSE41:       # %bb.0:
330; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
331; SSE41-NEXT:    retq
332;
333; AVX-LABEL: test19:
334; AVX:       # %bb.0:
335; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
336; AVX-NEXT:    retq
337  %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> %a, <4 x i32> %b
338  ret <4 x i32> %1
339}
340
341define <2 x double> @test20(<2 x double> %a, <2 x double> %b) {
342; SSE2-LABEL: test20:
343; SSE2:       # %bb.0:
344; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
345; SSE2-NEXT:    retq
346;
347; SSE41-LABEL: test20:
348; SSE41:       # %bb.0:
349; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
350; SSE41-NEXT:    retq
351;
352; AVX-LABEL: test20:
353; AVX:       # %bb.0:
354; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
355; AVX-NEXT:    retq
356  %1 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %b
357  ret <2 x double> %1
358}
359
360define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
361; SSE2-LABEL: test21:
362; SSE2:       # %bb.0:
363; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
364; SSE2-NEXT:    retq
365;
366; SSE41-LABEL: test21:
367; SSE41:       # %bb.0:
368; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
369; SSE41-NEXT:    retq
370;
371; AVX-LABEL: test21:
372; AVX:       # %bb.0:
373; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
374; AVX-NEXT:    retq
375  %1 = select <2 x i1> <i1 false, i1 true>, <2 x i64> %a, <2 x i64> %b
376  ret <2 x i64> %1
377}
378
379define <4 x float> @test22(<4 x float> %a, <4 x float> %b) {
380; SSE2-LABEL: test22:
381; SSE2:       # %bb.0:
382; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
383; SSE2-NEXT:    movaps %xmm1, %xmm0
384; SSE2-NEXT:    retq
385;
386; SSE41-LABEL: test22:
387; SSE41:       # %bb.0:
388; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
389; SSE41-NEXT:    retq
390;
391; AVX-LABEL: test22:
392; AVX:       # %bb.0:
393; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
394; AVX-NEXT:    retq
395  %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
396  ret <4 x float> %1
397}
398
399define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
400; SSE2-LABEL: test23:
401; SSE2:       # %bb.0:
402; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
403; SSE2-NEXT:    movaps %xmm1, %xmm0
404; SSE2-NEXT:    retq
405;
406; SSE41-LABEL: test23:
407; SSE41:       # %bb.0:
408; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
409; SSE41-NEXT:    retq
410;
411; AVX-LABEL: test23:
412; AVX:       # %bb.0:
413; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
414; AVX-NEXT:    retq
415  %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %a, <4 x i32> %b
416  ret <4 x i32> %1
417}
418
419define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
420; SSE2-LABEL: test24:
421; SSE2:       # %bb.0:
422; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
423; SSE2-NEXT:    retq
424;
425; SSE41-LABEL: test24:
426; SSE41:       # %bb.0:
427; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
428; SSE41-NEXT:    retq
429;
430; AVX-LABEL: test24:
431; AVX:       # %bb.0:
432; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
433; AVX-NEXT:    retq
434  %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
435  ret <2 x double> %1
436}
437
438define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
439; SSE2-LABEL: test25:
440; SSE2:       # %bb.0:
441; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
442; SSE2-NEXT:    retq
443;
444; SSE41-LABEL: test25:
445; SSE41:       # %bb.0:
446; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
447; SSE41-NEXT:    retq
448;
449; AVX-LABEL: test25:
450; AVX:       # %bb.0:
451; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
452; AVX-NEXT:    retq
453  %1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
454  ret <2 x i64> %1
455}
456
457define <16 x i8> @test26(<16 x i8> %a, <16 x i8> %b) {
458; SSE2-LABEL: test26:
459; SSE2:       # %bb.0:
460; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
461; SSE2-NEXT:    andps %xmm2, %xmm1
462; SSE2-NEXT:    andnps %xmm0, %xmm2
463; SSE2-NEXT:    orps %xmm1, %xmm2
464; SSE2-NEXT:    movaps %xmm2, %xmm0
465; SSE2-NEXT:    retq
466;
467; SSE41-LABEL: test26:
468; SSE41:       # %bb.0:
469; SSE41-NEXT:    movdqa %xmm0, %xmm2
470; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
471; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
472; SSE41-NEXT:    movdqa %xmm2, %xmm0
473; SSE41-NEXT:    retq
474;
475; AVX1-LABEL: test26:
476; AVX1:       # %bb.0:
477; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
478; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
479; AVX1-NEXT:    retq
480;
481; AVX2-LABEL: test26:
482; AVX2:       # %bb.0:
483; AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
484; AVX2-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
485; AVX2-NEXT:    retq
486  %1 = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> %a, <16 x i8> %b
487  ret <16 x i8> %1
488}
489
490define <32 x i8> @test27(<32 x i8> %a, <32 x i8> %b) {
491; SSE2-LABEL: test27:
492; SSE2:       # %bb.0:
493; SSE2-NEXT:    movaps {{.*#+}} xmm4 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255]
494; SSE2-NEXT:    movaps %xmm4, %xmm5
495; SSE2-NEXT:    andnps %xmm2, %xmm5
496; SSE2-NEXT:    andps %xmm4, %xmm0
497; SSE2-NEXT:    orps %xmm5, %xmm0
498; SSE2-NEXT:    andps %xmm4, %xmm1
499; SSE2-NEXT:    andnps %xmm3, %xmm4
500; SSE2-NEXT:    orps %xmm4, %xmm1
501; SSE2-NEXT:    retq
502;
503; SSE41-LABEL: test27:
504; SSE41:       # %bb.0:
505; SSE41-NEXT:    movdqa %xmm0, %xmm4
506; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255]
507; SSE41-NEXT:    pblendvb %xmm0, %xmm4, %xmm2
508; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm3
509; SSE41-NEXT:    movdqa %xmm2, %xmm0
510; SSE41-NEXT:    movdqa %xmm3, %xmm1
511; SSE41-NEXT:    retq
512;
513; AVX1-LABEL: test27:
514; AVX1:       # %bb.0:
515; AVX1-NEXT:    vbroadcastss {{.*#+}} ymm2 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255]
516; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
517; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
518; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
519; AVX1-NEXT:    retq
520;
521; AVX2-LABEL: test27:
522; AVX2:       # %bb.0:
523; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255]
524; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
525; AVX2-NEXT:    retq
526  %1 = select <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true>, <32 x i8> %a, <32 x i8> %b
527  ret <32 x i8> %1
528}
529
530define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) {
531; SSE-LABEL: select_of_shuffles_0:
532; SSE:       # %bb.0:
533; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
534; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
535; SSE-NEXT:    subps %xmm1, %xmm0
536; SSE-NEXT:    retq
537;
538; AVX-LABEL: select_of_shuffles_0:
539; AVX:       # %bb.0:
540; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
541; AVX-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
542; AVX-NEXT:    vsubps %xmm1, %xmm0, %xmm0
543; AVX-NEXT:    retq
544  %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
545  %2 = shufflevector <2 x float> %a1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
546  %3 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %2, <4 x float> %1
547  %4 = shufflevector <2 x float> %b0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
548  %5 = shufflevector <2 x float> %b1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
549  %6 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %5, <4 x float> %4
550  %7 = fsub <4 x float> %3, %6
551  ret <4 x float> %7
552}
553
554; PR20677
555define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) {
556; SSE-LABEL: select_illegal:
557; SSE:       # %bb.0:
558; SSE-NEXT:    movq %rdi, %rax
559; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm4
560; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm5
561; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm6
562; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm7
563; SSE-NEXT:    movaps %xmm7, 112(%rdi)
564; SSE-NEXT:    movaps %xmm6, 96(%rdi)
565; SSE-NEXT:    movaps %xmm5, 80(%rdi)
566; SSE-NEXT:    movaps %xmm4, 64(%rdi)
567; SSE-NEXT:    movaps %xmm3, 48(%rdi)
568; SSE-NEXT:    movaps %xmm2, 32(%rdi)
569; SSE-NEXT:    movaps %xmm1, 16(%rdi)
570; SSE-NEXT:    movaps %xmm0, (%rdi)
571; SSE-NEXT:    retq
572;
573; AVX-LABEL: select_illegal:
574; AVX:       # %bb.0:
575; AVX-NEXT:    vmovaps %ymm7, %ymm3
576; AVX-NEXT:    vmovaps %ymm6, %ymm2
577; AVX-NEXT:    retq
578  %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b
579  ret <16 x double> %sel
580}
581
582; Make sure we can optimize the condition MSB when it is used by 2 selects.
583; The v2i1 here will be passed as v2i64 and we will emit a sign_extend_inreg to fill the upper bits.
584; We should be able to remove the sra from the sign_extend_inreg to leave only shl.
585define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
586; SSE2-LABEL: shrunkblend_2uses:
587; SSE2:       # %bb.0:
588; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
589; SSE2-NEXT:    pslld $31, %xmm0
590; SSE2-NEXT:    psrad $31, %xmm0
591; SSE2-NEXT:    movdqa %xmm0, %xmm5
592; SSE2-NEXT:    pandn %xmm2, %xmm5
593; SSE2-NEXT:    pand %xmm0, %xmm1
594; SSE2-NEXT:    por %xmm1, %xmm5
595; SSE2-NEXT:    pand %xmm0, %xmm3
596; SSE2-NEXT:    pandn %xmm4, %xmm0
597; SSE2-NEXT:    por %xmm3, %xmm0
598; SSE2-NEXT:    paddq %xmm5, %xmm0
599; SSE2-NEXT:    retq
600;
601; SSE41-LABEL: shrunkblend_2uses:
602; SSE41:       # %bb.0:
603; SSE41-NEXT:    psllq $63, %xmm0
604; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
605; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm4
606; SSE41-NEXT:    paddq %xmm2, %xmm4
607; SSE41-NEXT:    movdqa %xmm4, %xmm0
608; SSE41-NEXT:    retq
609;
610; AVX-LABEL: shrunkblend_2uses:
611; AVX:       # %bb.0:
612; AVX-NEXT:    vpsllq $63, %xmm0, %xmm0
613; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm1
614; AVX-NEXT:    vblendvpd %xmm0, %xmm3, %xmm4, %xmm0
615; AVX-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
616; AVX-NEXT:    retq
617  %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
618  %y = select <2 x i1> %cond, <2 x i64> %c, <2 x i64> %d
619  %z = add <2 x i64> %x, %y
620  ret <2 x i64> %z
621}
622
623; Similar to above, but condition has a use that isn't a condition of a vselect so we can't optimize.
624define <2 x i64> @shrunkblend_nonvselectuse(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
625; SSE2-LABEL: shrunkblend_nonvselectuse:
626; SSE2:       # %bb.0:
627; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,2,2]
628; SSE2-NEXT:    pslld $31, %xmm3
629; SSE2-NEXT:    psrad $31, %xmm3
630; SSE2-NEXT:    movdqa %xmm3, %xmm0
631; SSE2-NEXT:    pandn %xmm2, %xmm0
632; SSE2-NEXT:    pand %xmm3, %xmm1
633; SSE2-NEXT:    por %xmm1, %xmm0
634; SSE2-NEXT:    paddq %xmm3, %xmm0
635; SSE2-NEXT:    retq
636;
637; SSE41-LABEL: shrunkblend_nonvselectuse:
638; SSE41:       # %bb.0:
639; SSE41-NEXT:    psllq $63, %xmm0
640; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
641; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
642; SSE41-NEXT:    psrad $31, %xmm0
643; SSE41-NEXT:    paddq %xmm2, %xmm0
644; SSE41-NEXT:    retq
645;
646; AVX-LABEL: shrunkblend_nonvselectuse:
647; AVX:       # %bb.0:
648; AVX-NEXT:    vpsllq $63, %xmm0, %xmm0
649; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm1
650; AVX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
651; AVX-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm0
652; AVX-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
653; AVX-NEXT:    retq
654  %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
655  %y = sext <2 x i1> %cond to <2 x i64>
656  %z = add <2 x i64> %x, %y
657  ret <2 x i64> %z
658}
659
660; This turns into a SHRUNKBLEND with SSE4 or later, and via
661; late shuffle magic, both sides of the blend are the same
662; value. If that is not simplified before isel, it can fail
663; to match (crash).
664
665define <2 x i32> @simplify_select(i32 %x, <2 x i1> %z) {
666; SSE2-LABEL: simplify_select:
667; SSE2:       # %bb.0:
668; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
669; SSE2-NEXT:    pslld $31, %xmm0
670; SSE2-NEXT:    psrad $31, %xmm0
671; SSE2-NEXT:    movd %edi, %xmm1
672; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,1,1]
673; SSE2-NEXT:    por %xmm1, %xmm2
674; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0,0]
675; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[1,1]
676; SSE2-NEXT:    pand %xmm0, %xmm2
677; SSE2-NEXT:    pandn %xmm1, %xmm0
678; SSE2-NEXT:    por %xmm2, %xmm0
679; SSE2-NEXT:    retq
680;
681; SSE41-LABEL: simplify_select:
682; SSE41:       # %bb.0:
683; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
684; SSE41-NEXT:    pslld $31, %xmm0
685; SSE41-NEXT:    movd %edi, %xmm1
686; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,1,1]
687; SSE41-NEXT:    por %xmm1, %xmm2
688; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1]
689; SSE41-NEXT:    pinsrd $1, %edi, %xmm1
690; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm1
691; SSE41-NEXT:    movaps %xmm1, %xmm0
692; SSE41-NEXT:    retq
693;
694; AVX-LABEL: simplify_select:
695; AVX:       # %bb.0:
696; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
697; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
698; AVX-NEXT:    vmovd %edi, %xmm1
699; AVX-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,0,1,1]
700; AVX-NEXT:    vpor %xmm1, %xmm2, %xmm1
701; AVX-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
702; AVX-NEXT:    vpinsrd $1, %edi, %xmm2, %xmm2
703; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
704; AVX-NEXT:    retq
705  %a = insertelement <2 x i32> <i32 0, i32 undef>, i32 %x, i32 1
706  %b = insertelement <2 x i32> <i32 undef, i32 0>, i32 %x, i32 0
707  %y = or <2 x i32> %a, %b
708  %p16 = extractelement <2 x i32> %y, i32 1
709  %p17 = insertelement <2 x i32> undef, i32 %p16, i32 0
710  %p18 = insertelement <2 x i32> %p17, i32 %x, i32 1
711  %r = select <2 x i1> %z, <2 x i32> %y, <2 x i32> %p18
712  ret <2 x i32> %r
713}
714
715; Test to make sure we don't try to insert a new setcc to swap the operands
716; of select with all zeros LHS if the setcc has additional users.
717define void @vselect_allzeros_LHS_multiple_use_setcc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, ptr %p1, ptr %p2) {
718; SSE2-LABEL: vselect_allzeros_LHS_multiple_use_setcc:
719; SSE2:       # %bb.0:
720; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1,2,4,8]
721; SSE2-NEXT:    pand %xmm3, %xmm0
722; SSE2-NEXT:    pcmpeqd %xmm3, %xmm0
723; SSE2-NEXT:    movdqa %xmm0, %xmm3
724; SSE2-NEXT:    pandn %xmm1, %xmm3
725; SSE2-NEXT:    pand %xmm2, %xmm0
726; SSE2-NEXT:    movdqa %xmm3, (%rdi)
727; SSE2-NEXT:    movdqa %xmm0, (%rsi)
728; SSE2-NEXT:    retq
729;
730; SSE41-LABEL: vselect_allzeros_LHS_multiple_use_setcc:
731; SSE41:       # %bb.0:
732; SSE41-NEXT:    pmovsxbd {{.*#+}} xmm3 = [1,2,4,8]
733; SSE41-NEXT:    pand %xmm3, %xmm0
734; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
735; SSE41-NEXT:    movdqa %xmm0, %xmm3
736; SSE41-NEXT:    pandn %xmm1, %xmm3
737; SSE41-NEXT:    pand %xmm2, %xmm0
738; SSE41-NEXT:    movdqa %xmm3, (%rdi)
739; SSE41-NEXT:    movdqa %xmm0, (%rsi)
740; SSE41-NEXT:    retq
741;
742; AVX-LABEL: vselect_allzeros_LHS_multiple_use_setcc:
743; AVX:       # %bb.0:
744; AVX-NEXT:    vpmovsxbd {{.*#+}} xmm3 = [1,2,4,8]
745; AVX-NEXT:    vpand %xmm3, %xmm0, %xmm0
746; AVX-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
747; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm1
748; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
749; AVX-NEXT:    vmovdqa %xmm1, (%rdi)
750; AVX-NEXT:    vmovdqa %xmm0, (%rsi)
751; AVX-NEXT:    retq
752  %and = and <4 x i32> %x, <i32 1, i32 2, i32 4, i32 8>
753  %cond = icmp ne <4 x i32> %and, zeroinitializer
754  %sel1 = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %y
755  %sel2 = select <4 x i1> %cond, <4 x i32> %z, <4 x i32> zeroinitializer
756  store <4 x i32> %sel1, ptr %p1
757  store <4 x i32> %sel2, ptr %p2
758  ret void
759}
760
761; This test case previously crashed after r363802, r363850, and r363856 due
762; any_extend_vector_inreg not being handled by the X86 backend.
763define i64 @vselect_any_extend_vector_inreg_crash(ptr %x) {
764; SSE-LABEL: vselect_any_extend_vector_inreg_crash:
765; SSE:       # %bb.0:
766; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
767; SSE-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
768; SSE-NEXT:    movd %xmm0, %eax
769; SSE-NEXT:    andl $1, %eax
770; SSE-NEXT:    shll $15, %eax
771; SSE-NEXT:    retq
772;
773; AVX1-LABEL: vselect_any_extend_vector_inreg_crash:
774; AVX1:       # %bb.0:
775; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
776; AVX1-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
777; AVX1-NEXT:    vmovd %xmm0, %eax
778; AVX1-NEXT:    andl $1, %eax
779; AVX1-NEXT:    shll $15, %eax
780; AVX1-NEXT:    retq
781;
782; AVX2-LABEL: vselect_any_extend_vector_inreg_crash:
783; AVX2:       # %bb.0:
784; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
785; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [49,49,49,49]
786; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
787; AVX2-NEXT:    vmovd %xmm0, %eax
788; AVX2-NEXT:    andl $1, %eax
789; AVX2-NEXT:    shll $15, %eax
790; AVX2-NEXT:    retq
7910:
792  %1 = load <8 x i8>, ptr %x
793  %2 = icmp eq <8 x i8> %1, <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
794  %3 = select <8 x i1> %2, <8 x i64> <i64 32768, i64 16384, i64 8192, i64 4096, i64 2048, i64 1024, i64 512, i64 256>, <8 x i64> zeroinitializer
795  %4 = extractelement <8 x i64> %3, i32 0
796  ret i64 %4
797}
798
799; Tests the scalarizeBinOp code in DAGCombiner
800define void @scalarize_binop(<1 x i1> %a) {
801; SSE-LABEL: scalarize_binop:
802; SSE:       # %bb.0: # %bb0
803; SSE-NEXT:    .p2align 4
804; SSE-NEXT:  .LBB35_1: # %bb1
805; SSE-NEXT:    # =>This Inner Loop Header: Depth=1
806; SSE-NEXT:    jmp .LBB35_1
807;
808; AVX-LABEL: scalarize_binop:
809; AVX:       # %bb.0: # %bb0
810; AVX-NEXT:    .p2align 4
811; AVX-NEXT:  .LBB35_1: # %bb1
812; AVX-NEXT:    # =>This Inner Loop Header: Depth=1
813; AVX-NEXT:    jmp .LBB35_1
814bb0:
815  br label %bb1
816
817bb1:
818  %b = select <1 x i1> %a, <1 x i1> zeroinitializer, <1 x i1> splat (i1 true)
819  br label %bb2
820
821bb2:
822  %c = extractelement <1 x i1> %b, i32 0
823  br label %bb1
824}
825