xref: /llvm-project/llvm/test/CodeGen/X86/vector-blend.ll (revision 8b43c1be23119c1024bed0a8ce392bc73727e2e2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7
8; AVX128 tests:
9
10define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
11; SSE2-LABEL: vsel_float:
12; SSE2:       # %bb.0: # %entry
13; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
14; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
15; SSE2-NEXT:    retq
16;
17; SSSE3-LABEL: vsel_float:
18; SSSE3:       # %bb.0: # %entry
19; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
20; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
21; SSSE3-NEXT:    retq
22;
23; SSE41-LABEL: vsel_float:
24; SSE41:       # %bb.0: # %entry
25; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
26; SSE41-NEXT:    retq
27;
28; AVX-LABEL: vsel_float:
29; AVX:       # %bb.0: # %entry
30; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
31; AVX-NEXT:    retq
32entry:
33  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
34  ret <4 x float> %vsel
35}
36
37define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) {
38; SSE2-LABEL: vsel_float2:
39; SSE2:       # %bb.0: # %entry
40; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
41; SSE2-NEXT:    movaps %xmm1, %xmm0
42; SSE2-NEXT:    retq
43;
44; SSSE3-LABEL: vsel_float2:
45; SSSE3:       # %bb.0: # %entry
46; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
47; SSSE3-NEXT:    movaps %xmm1, %xmm0
48; SSSE3-NEXT:    retq
49;
50; SSE41-LABEL: vsel_float2:
51; SSE41:       # %bb.0: # %entry
52; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
53; SSE41-NEXT:    retq
54;
55; AVX-LABEL: vsel_float2:
56; AVX:       # %bb.0: # %entry
57; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
58; AVX-NEXT:    retq
59entry:
60  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
61  ret <4 x float> %vsel
62}
63
64define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
65; SSE2-LABEL: vsel_4xi8:
66; SSE2:       # %bb.0: # %entry
67; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
68; SSE2-NEXT:    andps %xmm2, %xmm0
69; SSE2-NEXT:    andnps %xmm1, %xmm2
70; SSE2-NEXT:    orps %xmm2, %xmm0
71; SSE2-NEXT:    retq
72;
73; SSSE3-LABEL: vsel_4xi8:
74; SSSE3:       # %bb.0: # %entry
75; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
76; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,5,6,u,u,u,u,u,u,u,u,u,u,u,u]
77; SSSE3-NEXT:    retq
78;
79; SSE41-LABEL: vsel_4xi8:
80; SSE41:       # %bb.0: # %entry
81; SSE41-NEXT:    movdqa %xmm0, %xmm2
82; SSE41-NEXT:    movss {{.*#+}} xmm0 = [255,255,0,255,0,0,0,0,0,0,0,0,0,0,0,0]
83; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
84; SSE41-NEXT:    movdqa %xmm1, %xmm0
85; SSE41-NEXT:    retq
86;
87; AVX-LABEL: vsel_4xi8:
88; AVX:       # %bb.0: # %entry
89; AVX-NEXT:    vmovd {{.*#+}} xmm2 = [255,255,0,255,0,0,0,0,0,0,0,0,0,0,0,0]
90; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
91; AVX-NEXT:    retq
92entry:
93  %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
94  ret <4 x i8> %vsel
95}
96
97define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
98; SSE2-LABEL: vsel_4xi16:
99; SSE2:       # %bb.0: # %entry
100; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535]
101; SSE2-NEXT:    andps %xmm2, %xmm0
102; SSE2-NEXT:    andnps %xmm1, %xmm2
103; SSE2-NEXT:    orps %xmm2, %xmm0
104; SSE2-NEXT:    retq
105;
106; SSSE3-LABEL: vsel_4xi16:
107; SSSE3:       # %bb.0: # %entry
108; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535]
109; SSSE3-NEXT:    andps %xmm2, %xmm0
110; SSSE3-NEXT:    andnps %xmm1, %xmm2
111; SSSE3-NEXT:    orps %xmm2, %xmm0
112; SSSE3-NEXT:    retq
113;
114; SSE41-LABEL: vsel_4xi16:
115; SSE41:       # %bb.0: # %entry
116; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
117; SSE41-NEXT:    retq
118;
119; AVX-LABEL: vsel_4xi16:
120; AVX:       # %bb.0: # %entry
121; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
122; AVX-NEXT:    retq
123entry:
124  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
125  ret <4 x i16> %vsel
126}
127
128define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
129; SSE2-LABEL: vsel_i32:
130; SSE2:       # %bb.0: # %entry
131; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
132; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
133; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
134; SSE2-NEXT:    retq
135;
136; SSSE3-LABEL: vsel_i32:
137; SSSE3:       # %bb.0: # %entry
138; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
139; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
140; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
141; SSSE3-NEXT:    retq
142;
143; SSE41-LABEL: vsel_i32:
144; SSE41:       # %bb.0: # %entry
145; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
146; SSE41-NEXT:    retq
147;
148; AVX-LABEL: vsel_i32:
149; AVX:       # %bb.0: # %entry
150; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
151; AVX-NEXT:    retq
152entry:
153  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
154  ret <4 x i32> %vsel
155}
156
157define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
158; SSE2-LABEL: vsel_double:
159; SSE2:       # %bb.0: # %entry
160; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
161; SSE2-NEXT:    retq
162;
163; SSSE3-LABEL: vsel_double:
164; SSSE3:       # %bb.0: # %entry
165; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
166; SSSE3-NEXT:    retq
167;
168; SSE41-LABEL: vsel_double:
169; SSE41:       # %bb.0: # %entry
170; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
171; SSE41-NEXT:    retq
172;
173; AVX-LABEL: vsel_double:
174; AVX:       # %bb.0: # %entry
175; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
176; AVX-NEXT:    retq
177entry:
178  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
179  ret <2 x double> %vsel
180}
181
182define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
183; SSE2-LABEL: vsel_i64:
184; SSE2:       # %bb.0: # %entry
185; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
186; SSE2-NEXT:    retq
187;
188; SSSE3-LABEL: vsel_i64:
189; SSSE3:       # %bb.0: # %entry
190; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
191; SSSE3-NEXT:    retq
192;
193; SSE41-LABEL: vsel_i64:
194; SSE41:       # %bb.0: # %entry
195; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
196; SSE41-NEXT:    retq
197;
198; AVX-LABEL: vsel_i64:
199; AVX:       # %bb.0: # %entry
200; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
201; AVX-NEXT:    retq
202entry:
203  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
204  ret <2 x i64> %vsel
205}
206
207define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
208; SSE2-LABEL: vsel_8xi16:
209; SSE2:       # %bb.0: # %entry
210; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
211; SSE2-NEXT:    andps %xmm2, %xmm1
212; SSE2-NEXT:    andnps %xmm0, %xmm2
213; SSE2-NEXT:    orps %xmm1, %xmm2
214; SSE2-NEXT:    movaps %xmm2, %xmm0
215; SSE2-NEXT:    retq
216;
217; SSSE3-LABEL: vsel_8xi16:
218; SSSE3:       # %bb.0: # %entry
219; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
220; SSSE3-NEXT:    andps %xmm2, %xmm1
221; SSSE3-NEXT:    andnps %xmm0, %xmm2
222; SSSE3-NEXT:    orps %xmm1, %xmm2
223; SSSE3-NEXT:    movaps %xmm2, %xmm0
224; SSSE3-NEXT:    retq
225;
226; SSE41-LABEL: vsel_8xi16:
227; SSE41:       # %bb.0: # %entry
228; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
229; SSE41-NEXT:    retq
230;
231; AVX-LABEL: vsel_8xi16:
232; AVX:       # %bb.0: # %entry
233; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
234; AVX-NEXT:    retq
235entry:
236  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
237  ret <8 x i16> %vsel
238}
239
240define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
241; SSE2-LABEL: vsel_i8:
242; SSE2:       # %bb.0: # %entry
243; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
244; SSE2-NEXT:    andps %xmm2, %xmm1
245; SSE2-NEXT:    andnps %xmm0, %xmm2
246; SSE2-NEXT:    orps %xmm1, %xmm2
247; SSE2-NEXT:    movaps %xmm2, %xmm0
248; SSE2-NEXT:    retq
249;
250; SSSE3-LABEL: vsel_i8:
251; SSSE3:       # %bb.0: # %entry
252; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[12],zero,zero,zero
253; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,xmm1[9,10,11],zero,xmm1[13,14,15]
254; SSSE3-NEXT:    por %xmm1, %xmm0
255; SSSE3-NEXT:    retq
256;
257; SSE41-LABEL: vsel_i8:
258; SSE41:       # %bb.0: # %entry
259; SSE41-NEXT:    movdqa %xmm0, %xmm2
260; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
261; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
262; SSE41-NEXT:    movdqa %xmm2, %xmm0
263; SSE41-NEXT:    retq
264;
265; AVX1-LABEL: vsel_i8:
266; AVX1:       # %bb.0: # %entry
267; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
268; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
269; AVX1-NEXT:    retq
270;
271; AVX2-LABEL: vsel_i8:
272; AVX2:       # %bb.0: # %entry
273; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
274; AVX2-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
275; AVX2-NEXT:    retq
276entry:
277  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
278  ret <16 x i8> %vsel
279}
280
281
282; AVX256 tests:
283
284define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
285; SSE2-LABEL: vsel_float8:
286; SSE2:       # %bb.0: # %entry
287; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
288; SSE2-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
289; SSE2-NEXT:    movaps %xmm2, %xmm0
290; SSE2-NEXT:    movaps %xmm3, %xmm1
291; SSE2-NEXT:    retq
292;
293; SSSE3-LABEL: vsel_float8:
294; SSSE3:       # %bb.0: # %entry
295; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
296; SSSE3-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
297; SSSE3-NEXT:    movaps %xmm2, %xmm0
298; SSSE3-NEXT:    movaps %xmm3, %xmm1
299; SSSE3-NEXT:    retq
300;
301; SSE41-LABEL: vsel_float8:
302; SSE41:       # %bb.0: # %entry
303; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
304; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
305; SSE41-NEXT:    retq
306;
307; AVX-LABEL: vsel_float8:
308; AVX:       # %bb.0: # %entry
309; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
310; AVX-NEXT:    retq
311entry:
312  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
313  ret <8 x float> %vsel
314}
315
316define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
317; SSE2-LABEL: vsel_i328:
318; SSE2:       # %bb.0: # %entry
319; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
320; SSE2-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
321; SSE2-NEXT:    movaps %xmm2, %xmm0
322; SSE2-NEXT:    movaps %xmm3, %xmm1
323; SSE2-NEXT:    retq
324;
325; SSSE3-LABEL: vsel_i328:
326; SSSE3:       # %bb.0: # %entry
327; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
328; SSSE3-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
329; SSSE3-NEXT:    movaps %xmm2, %xmm0
330; SSSE3-NEXT:    movaps %xmm3, %xmm1
331; SSSE3-NEXT:    retq
332;
333; SSE41-LABEL: vsel_i328:
334; SSE41:       # %bb.0: # %entry
335; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
336; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
337; SSE41-NEXT:    retq
338;
339; AVX-LABEL: vsel_i328:
340; AVX:       # %bb.0: # %entry
341; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
342; AVX-NEXT:    retq
343entry:
344  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
345  ret <8 x i32> %vsel
346}
347
348define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
349; SSE2-LABEL: vsel_double8:
350; SSE2:       # %bb.0: # %entry
351; SSE2-NEXT:    movaps %xmm7, %xmm3
352; SSE2-NEXT:    movaps %xmm5, %xmm1
353; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
354; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
355; SSE2-NEXT:    retq
356;
357; SSSE3-LABEL: vsel_double8:
358; SSSE3:       # %bb.0: # %entry
359; SSSE3-NEXT:    movaps %xmm7, %xmm3
360; SSSE3-NEXT:    movaps %xmm5, %xmm1
361; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
362; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
363; SSSE3-NEXT:    retq
364;
365; SSE41-LABEL: vsel_double8:
366; SSE41:       # %bb.0: # %entry
367; SSE41-NEXT:    movaps %xmm7, %xmm3
368; SSE41-NEXT:    movaps %xmm5, %xmm1
369; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
370; SSE41-NEXT:    blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
371; SSE41-NEXT:    retq
372;
373; AVX-LABEL: vsel_double8:
374; AVX:       # %bb.0: # %entry
375; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
376; AVX-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
377; AVX-NEXT:    retq
378entry:
379  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
380  ret <8 x double> %vsel
381}
382
383define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
384; SSE2-LABEL: vsel_i648:
385; SSE2:       # %bb.0: # %entry
386; SSE2-NEXT:    movaps %xmm7, %xmm3
387; SSE2-NEXT:    movaps %xmm5, %xmm1
388; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
389; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
390; SSE2-NEXT:    retq
391;
392; SSSE3-LABEL: vsel_i648:
393; SSSE3:       # %bb.0: # %entry
394; SSSE3-NEXT:    movaps %xmm7, %xmm3
395; SSSE3-NEXT:    movaps %xmm5, %xmm1
396; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
397; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
398; SSSE3-NEXT:    retq
399;
400; SSE41-LABEL: vsel_i648:
401; SSE41:       # %bb.0: # %entry
402; SSE41-NEXT:    movaps %xmm7, %xmm3
403; SSE41-NEXT:    movaps %xmm5, %xmm1
404; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
405; SSE41-NEXT:    blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
406; SSE41-NEXT:    retq
407;
408; AVX-LABEL: vsel_i648:
409; AVX:       # %bb.0: # %entry
410; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
411; AVX-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
412; AVX-NEXT:    retq
413entry:
414  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
415  ret <8 x i64> %vsel
416}
417
418define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
419; SSE2-LABEL: vsel_double4:
420; SSE2:       # %bb.0: # %entry
421; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
422; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
423; SSE2-NEXT:    retq
424;
425; SSSE3-LABEL: vsel_double4:
426; SSSE3:       # %bb.0: # %entry
427; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
428; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
429; SSSE3-NEXT:    retq
430;
431; SSE41-LABEL: vsel_double4:
432; SSE41:       # %bb.0: # %entry
433; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
434; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
435; SSE41-NEXT:    retq
436;
437; AVX-LABEL: vsel_double4:
438; AVX:       # %bb.0: # %entry
439; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
440; AVX-NEXT:    retq
441entry:
442  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
443  ret <4 x double> %vsel
444}
445
446define <2 x double> @testa(<2 x double> %x, <2 x double> %y) {
447; SSE2-LABEL: testa:
448; SSE2:       # %bb.0: # %entry
449; SSE2-NEXT:    movapd %xmm1, %xmm2
450; SSE2-NEXT:    cmplepd %xmm0, %xmm2
451; SSE2-NEXT:    andpd %xmm2, %xmm0
452; SSE2-NEXT:    andnpd %xmm1, %xmm2
453; SSE2-NEXT:    orpd %xmm2, %xmm0
454; SSE2-NEXT:    retq
455;
456; SSSE3-LABEL: testa:
457; SSSE3:       # %bb.0: # %entry
458; SSSE3-NEXT:    movapd %xmm1, %xmm2
459; SSSE3-NEXT:    cmplepd %xmm0, %xmm2
460; SSSE3-NEXT:    andpd %xmm2, %xmm0
461; SSSE3-NEXT:    andnpd %xmm1, %xmm2
462; SSSE3-NEXT:    orpd %xmm2, %xmm0
463; SSSE3-NEXT:    retq
464;
465; SSE41-LABEL: testa:
466; SSE41:       # %bb.0: # %entry
467; SSE41-NEXT:    movapd %xmm0, %xmm2
468; SSE41-NEXT:    movapd %xmm1, %xmm0
469; SSE41-NEXT:    cmplepd %xmm2, %xmm0
470; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
471; SSE41-NEXT:    movapd %xmm1, %xmm0
472; SSE41-NEXT:    retq
473;
474; AVX-LABEL: testa:
475; AVX:       # %bb.0: # %entry
476; AVX-NEXT:    vcmplepd %xmm0, %xmm1, %xmm2
477; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
478; AVX-NEXT:    retq
479entry:
480  %max_is_x = fcmp oge <2 x double> %x, %y
481  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
482  ret <2 x double> %max
483}
484
485define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
486; SSE2-LABEL: testb:
487; SSE2:       # %bb.0: # %entry
488; SSE2-NEXT:    movapd %xmm1, %xmm2
489; SSE2-NEXT:    cmpnlepd %xmm0, %xmm2
490; SSE2-NEXT:    andpd %xmm2, %xmm0
491; SSE2-NEXT:    andnpd %xmm1, %xmm2
492; SSE2-NEXT:    orpd %xmm2, %xmm0
493; SSE2-NEXT:    retq
494;
495; SSSE3-LABEL: testb:
496; SSSE3:       # %bb.0: # %entry
497; SSSE3-NEXT:    movapd %xmm1, %xmm2
498; SSSE3-NEXT:    cmpnlepd %xmm0, %xmm2
499; SSSE3-NEXT:    andpd %xmm2, %xmm0
500; SSSE3-NEXT:    andnpd %xmm1, %xmm2
501; SSSE3-NEXT:    orpd %xmm2, %xmm0
502; SSSE3-NEXT:    retq
503;
504; SSE41-LABEL: testb:
505; SSE41:       # %bb.0: # %entry
506; SSE41-NEXT:    movapd %xmm0, %xmm2
507; SSE41-NEXT:    movapd %xmm1, %xmm0
508; SSE41-NEXT:    cmpnlepd %xmm2, %xmm0
509; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
510; SSE41-NEXT:    movapd %xmm1, %xmm0
511; SSE41-NEXT:    retq
512;
513; AVX-LABEL: testb:
514; AVX:       # %bb.0: # %entry
515; AVX-NEXT:    vcmpnlepd %xmm0, %xmm1, %xmm2
516; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
517; AVX-NEXT:    retq
518entry:
519  %min_is_x = fcmp ult <2 x double> %x, %y
520  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
521  ret <2 x double> %min
522}
523
524; If we can figure out a blend has a constant mask, we should emit the
525; blend instruction with an immediate mask
526define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
527; SSE2-LABEL: constant_blendvpd_avx:
528; SSE2:       # %bb.0: # %entry
529; SSE2-NEXT:    movaps %xmm2, %xmm0
530; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
531; SSE2-NEXT:    retq
532;
533; SSSE3-LABEL: constant_blendvpd_avx:
534; SSSE3:       # %bb.0: # %entry
535; SSSE3-NEXT:    movaps %xmm2, %xmm0
536; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
537; SSSE3-NEXT:    retq
538;
539; SSE41-LABEL: constant_blendvpd_avx:
540; SSE41:       # %bb.0: # %entry
541; SSE41-NEXT:    movaps %xmm2, %xmm0
542; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
543; SSE41-NEXT:    retq
544;
545; AVX-LABEL: constant_blendvpd_avx:
546; AVX:       # %bb.0: # %entry
547; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
548; AVX-NEXT:    retq
549entry:
550  %select = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
551  ret <4 x double> %select
552}
553
554define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
555; SSE2-LABEL: constant_blendvps_avx:
556; SSE2:       # %bb.0: # %entry
557; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
558; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
559; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
560; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
561; SSE2-NEXT:    movaps %xmm2, %xmm0
562; SSE2-NEXT:    movaps %xmm3, %xmm1
563; SSE2-NEXT:    retq
564;
565; SSSE3-LABEL: constant_blendvps_avx:
566; SSSE3:       # %bb.0: # %entry
567; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
568; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
569; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
570; SSSE3-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
571; SSSE3-NEXT:    movaps %xmm2, %xmm0
572; SSSE3-NEXT:    movaps %xmm3, %xmm1
573; SSSE3-NEXT:    retq
574;
575; SSE41-LABEL: constant_blendvps_avx:
576; SSE41:       # %bb.0: # %entry
577; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
578; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[3]
579; SSE41-NEXT:    retq
580;
581; AVX-LABEL: constant_blendvps_avx:
582; AVX:       # %bb.0: # %entry
583; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7]
584; AVX-NEXT:    retq
585entry:
586  %select = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
587  ret <8 x float> %select
588}
589
590define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
591; SSE2-LABEL: constant_pblendvb_avx2:
592; SSE2:       # %bb.0: # %entry
593; SSE2-NEXT:    movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
594; SSE2-NEXT:    movaps %xmm4, %xmm5
595; SSE2-NEXT:    andnps %xmm0, %xmm5
596; SSE2-NEXT:    andps %xmm4, %xmm2
597; SSE2-NEXT:    orps %xmm2, %xmm5
598; SSE2-NEXT:    andps %xmm4, %xmm3
599; SSE2-NEXT:    andnps %xmm1, %xmm4
600; SSE2-NEXT:    orps %xmm3, %xmm4
601; SSE2-NEXT:    movaps %xmm5, %xmm0
602; SSE2-NEXT:    movaps %xmm4, %xmm1
603; SSE2-NEXT:    retq
604;
605; SSSE3-LABEL: constant_pblendvb_avx2:
606; SSSE3:       # %bb.0: # %entry
607; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [128,128,2,128,4,5,6,128,128,128,10,128,12,13,14,128]
608; SSSE3-NEXT:    pshufb %xmm4, %xmm0
609; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [0,1,128,3,128,128,128,7,8,9,128,11,128,128,128,15]
610; SSSE3-NEXT:    pshufb %xmm5, %xmm2
611; SSSE3-NEXT:    por %xmm2, %xmm0
612; SSSE3-NEXT:    pshufb %xmm4, %xmm1
613; SSSE3-NEXT:    pshufb %xmm5, %xmm3
614; SSSE3-NEXT:    por %xmm3, %xmm1
615; SSSE3-NEXT:    retq
616;
617; SSE41-LABEL: constant_pblendvb_avx2:
618; SSE41:       # %bb.0: # %entry
619; SSE41-NEXT:    movdqa %xmm0, %xmm4
620; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
621; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm4
622; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
623; SSE41-NEXT:    movdqa %xmm4, %xmm0
624; SSE41-NEXT:    retq
625;
626; AVX1-LABEL: constant_pblendvb_avx2:
627; AVX1:       # %bb.0: # %entry
628; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
629; AVX1-NEXT:    vandnps %ymm0, %ymm2, %ymm0
630; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
631; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
632; AVX1-NEXT:    retq
633;
634; AVX2-LABEL: constant_pblendvb_avx2:
635; AVX2:       # %bb.0: # %entry
636; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
637; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
638; AVX2-NEXT:    retq
639entry:
640  %select = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd
641  ret <32 x i8> %select
642}
643
644declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
645declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
646
647;; 4 tests for shufflevectors that optimize to blend + immediate
648define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
649; SSE2-LABEL: blend_shufflevector_4xfloat:
650; SSE2:       # %bb.0: # %entry
651; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
652; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
653; SSE2-NEXT:    retq
654;
655; SSSE3-LABEL: blend_shufflevector_4xfloat:
656; SSSE3:       # %bb.0: # %entry
657; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
658; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
659; SSSE3-NEXT:    retq
660;
661; SSE41-LABEL: blend_shufflevector_4xfloat:
662; SSE41:       # %bb.0: # %entry
663; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
664; SSE41-NEXT:    retq
665;
666; AVX-LABEL: blend_shufflevector_4xfloat:
667; AVX:       # %bb.0: # %entry
668; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
669; AVX-NEXT:    retq
670entry:
671  %select = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
672  ret <4 x float> %select
673}
674
675define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) {
676; SSE2-LABEL: blend_shufflevector_8xfloat:
677; SSE2:       # %bb.0: # %entry
678; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
679; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
680; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
681; SSE2-NEXT:    movaps %xmm2, %xmm0
682; SSE2-NEXT:    movaps %xmm3, %xmm1
683; SSE2-NEXT:    retq
684;
685; SSSE3-LABEL: blend_shufflevector_8xfloat:
686; SSSE3:       # %bb.0: # %entry
687; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
688; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
689; SSSE3-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
690; SSSE3-NEXT:    movaps %xmm2, %xmm0
691; SSSE3-NEXT:    movaps %xmm3, %xmm1
692; SSSE3-NEXT:    retq
693;
694; SSE41-LABEL: blend_shufflevector_8xfloat:
695; SSE41:       # %bb.0: # %entry
696; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
697; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3]
698; SSE41-NEXT:    retq
699;
700; AVX-LABEL: blend_shufflevector_8xfloat:
701; AVX:       # %bb.0: # %entry
702; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5],ymm0[6],ymm1[7]
703; AVX-NEXT:    retq
704entry:
705  %select = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15>
706  ret <8 x float> %select
707}
708
709define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
710; SSE2-LABEL: blend_shufflevector_4xdouble:
711; SSE2:       # %bb.0: # %entry
712; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
713; SSE2-NEXT:    retq
714;
715; SSSE3-LABEL: blend_shufflevector_4xdouble:
716; SSSE3:       # %bb.0: # %entry
717; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
718; SSSE3-NEXT:    retq
719;
720; SSE41-LABEL: blend_shufflevector_4xdouble:
721; SSE41:       # %bb.0: # %entry
722; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
723; SSE41-NEXT:    retq
724;
725; AVX-LABEL: blend_shufflevector_4xdouble:
726; AVX:       # %bb.0: # %entry
727; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
728; AVX-NEXT:    retq
729entry:
730  %select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
731  ret <4 x double> %select
732}
733
734define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) {
735; SSE2-LABEL: blend_shufflevector_4xi64:
736; SSE2:       # %bb.0: # %entry
737; SSE2-NEXT:    movaps %xmm3, %xmm1
738; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
739; SSE2-NEXT:    retq
740;
741; SSSE3-LABEL: blend_shufflevector_4xi64:
742; SSSE3:       # %bb.0: # %entry
743; SSSE3-NEXT:    movaps %xmm3, %xmm1
744; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
745; SSSE3-NEXT:    retq
746;
747; SSE41-LABEL: blend_shufflevector_4xi64:
748; SSE41:       # %bb.0: # %entry
749; SSE41-NEXT:    movaps %xmm3, %xmm1
750; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
751; SSE41-NEXT:    retq
752;
753; AVX-LABEL: blend_shufflevector_4xi64:
754; AVX:       # %bb.0: # %entry
755; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
756; AVX-NEXT:    retq
757entry:
758  %select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
759  ret <4 x i64> %select
760}
761
762define <4 x i32> @blend_logic_v4i32(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) {
763; SSE2-LABEL: blend_logic_v4i32:
764; SSE2:       # %bb.0: # %entry
765; SSE2-NEXT:    psrad $31, %xmm0
766; SSE2-NEXT:    pand %xmm0, %xmm1
767; SSE2-NEXT:    pandn %xmm2, %xmm0
768; SSE2-NEXT:    por %xmm1, %xmm0
769; SSE2-NEXT:    retq
770;
771; SSSE3-LABEL: blend_logic_v4i32:
772; SSSE3:       # %bb.0: # %entry
773; SSSE3-NEXT:    psrad $31, %xmm0
774; SSSE3-NEXT:    pand %xmm0, %xmm1
775; SSSE3-NEXT:    pandn %xmm2, %xmm0
776; SSSE3-NEXT:    por %xmm1, %xmm0
777; SSSE3-NEXT:    retq
778;
779; SSE41-LABEL: blend_logic_v4i32:
780; SSE41:       # %bb.0: # %entry
781; SSE41-NEXT:    psrad $31, %xmm0
782; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
783; SSE41-NEXT:    movdqa %xmm2, %xmm0
784; SSE41-NEXT:    retq
785;
786; AVX-LABEL: blend_logic_v4i32:
787; AVX:       # %bb.0: # %entry
788; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
789; AVX-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
790; AVX-NEXT:    retq
791entry:
792  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
793  %sub = sub nsw <4 x i32> zeroinitializer, %a
794  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
795  %1 = and <4 x i32> %c, %0
796  %2 = and <4 x i32> %a, %b.lobit
797  %cond = or <4 x i32> %1, %2
798  ret <4 x i32> %cond
799}
800
801define <8 x i32> @blend_logic_v8i32(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) {
802; SSE2-LABEL: blend_logic_v8i32:
803; SSE2:       # %bb.0: # %entry
804; SSE2-NEXT:    psrad $31, %xmm0
805; SSE2-NEXT:    psrad $31, %xmm1
806; SSE2-NEXT:    pand %xmm1, %xmm3
807; SSE2-NEXT:    pandn %xmm5, %xmm1
808; SSE2-NEXT:    por %xmm3, %xmm1
809; SSE2-NEXT:    pand %xmm0, %xmm2
810; SSE2-NEXT:    pandn %xmm4, %xmm0
811; SSE2-NEXT:    por %xmm2, %xmm0
812; SSE2-NEXT:    retq
813;
814; SSSE3-LABEL: blend_logic_v8i32:
815; SSSE3:       # %bb.0: # %entry
816; SSSE3-NEXT:    psrad $31, %xmm0
817; SSSE3-NEXT:    psrad $31, %xmm1
818; SSSE3-NEXT:    pand %xmm1, %xmm3
819; SSSE3-NEXT:    pandn %xmm5, %xmm1
820; SSSE3-NEXT:    por %xmm3, %xmm1
821; SSSE3-NEXT:    pand %xmm0, %xmm2
822; SSSE3-NEXT:    pandn %xmm4, %xmm0
823; SSSE3-NEXT:    por %xmm2, %xmm0
824; SSSE3-NEXT:    retq
825;
826; SSE41-LABEL: blend_logic_v8i32:
827; SSE41:       # %bb.0: # %entry
828; SSE41-NEXT:    psrad $31, %xmm1
829; SSE41-NEXT:    psrad $31, %xmm0
830; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm4
831; SSE41-NEXT:    movdqa %xmm1, %xmm0
832; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm5
833; SSE41-NEXT:    movdqa %xmm4, %xmm0
834; SSE41-NEXT:    movdqa %xmm5, %xmm1
835; SSE41-NEXT:    retq
836;
837; AVX1-LABEL: blend_logic_v8i32:
838; AVX1:       # %bb.0: # %entry
839; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm3
840; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
841; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
842; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
843; AVX1-NEXT:    vandnps %ymm2, %ymm0, %ymm2
844; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
845; AVX1-NEXT:    vorps %ymm0, %ymm2, %ymm0
846; AVX1-NEXT:    retq
847;
848; AVX2-LABEL: blend_logic_v8i32:
849; AVX2:       # %bb.0: # %entry
850; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
851; AVX2-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
852; AVX2-NEXT:    retq
853entry:
854  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
855  %sub = sub nsw <8 x i32> zeroinitializer, %a
856  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
857  %1 = and <8 x i32> %c, %0
858  %2 = and <8 x i32> %a, %b.lobit
859  %cond = or <8 x i32> %1, %2
860  ret <8 x i32> %cond
861}
862
863define <4 x i32> @blend_neg_logic_v4i32(<4 x i32> %a, <4 x i32> %b) {
864; SSE-LABEL: blend_neg_logic_v4i32:
865; SSE:       # %bb.0: # %entry
866; SSE-NEXT:    psrad $31, %xmm1
867; SSE-NEXT:    pxor %xmm1, %xmm0
868; SSE-NEXT:    psubd %xmm1, %xmm0
869; SSE-NEXT:    retq
870;
871; AVX-LABEL: blend_neg_logic_v4i32:
872; AVX:       # %bb.0: # %entry
873; AVX-NEXT:    vpsrad $31, %xmm1, %xmm1
874; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
875; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
876; AVX-NEXT:    retq
877entry:
878  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
879  %sub = sub nsw <4 x i32> zeroinitializer, %a
880  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
881  %1 = and <4 x i32> %a, %0
882  %2 = and <4 x i32> %b.lobit, %sub
883  %cond = or <4 x i32> %1, %2
884  ret <4 x i32> %cond
885}
886
887define <8 x i32> @blend_neg_logic_v8i32(<8 x i32> %a, <8 x i32> %b) {
888; SSE-LABEL: blend_neg_logic_v8i32:
889; SSE:       # %bb.0: # %entry
890; SSE-NEXT:    psrad $31, %xmm3
891; SSE-NEXT:    psrad $31, %xmm2
892; SSE-NEXT:    pxor %xmm2, %xmm0
893; SSE-NEXT:    psubd %xmm2, %xmm0
894; SSE-NEXT:    pxor %xmm3, %xmm1
895; SSE-NEXT:    psubd %xmm3, %xmm1
896; SSE-NEXT:    retq
897;
898; AVX1-LABEL: blend_neg_logic_v8i32:
899; AVX1:       # %bb.0: # %entry
900; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
901; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
902; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
903; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
904; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
905; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
906; AVX1-NEXT:    vpsubd %xmm2, %xmm3, %xmm2
907; AVX1-NEXT:    vpsubd %xmm0, %xmm3, %xmm3
908; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
909; AVX1-NEXT:    vandnps %ymm0, %ymm1, %ymm0
910; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
911; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
912; AVX1-NEXT:    retq
913;
914; AVX2-LABEL: blend_neg_logic_v8i32:
915; AVX2:       # %bb.0: # %entry
916; AVX2-NEXT:    vpsrad $31, %ymm1, %ymm1
917; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
918; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
919; AVX2-NEXT:    retq
920entry:
921  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
922  %sub = sub nsw <8 x i32> zeroinitializer, %a
923  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
924  %1 = and <8 x i32> %a, %0
925  %2 = and <8 x i32> %b.lobit, %sub
926  %cond = or <8 x i32> %1, %2
927  ret <8 x i32> %cond
928}
929
930define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
931; SSE-LABEL: blend_neg_logic_v4i32_2:
932; SSE:       # %bb.0: # %entry
933; SSE-NEXT:    psrad $31, %xmm1
934; SSE-NEXT:    pxor %xmm1, %xmm0
935; SSE-NEXT:    psubd %xmm0, %xmm1
936; SSE-NEXT:    movdqa %xmm1, %xmm0
937; SSE-NEXT:    retq
938;
939; AVX-LABEL: blend_neg_logic_v4i32_2:
940; AVX:       # %bb.0: # %entry
941; AVX-NEXT:    vpsrad $31, %xmm1, %xmm1
942; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
943; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
944; AVX-NEXT:    retq
945entry:
946  %0 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
947  %1 = trunc <4 x i32> %0 to <4 x i1>
948  %2 = sub nsw <4 x i32> zeroinitializer, %v
949  %3 = select <4 x i1> %1, <4 x i32> %v, <4 x i32> %2
950  ret <4 x i32> %3
951}
952