xref: /llvm-project/llvm/test/CodeGen/X86/vselect-zero.ll (revision 8fa1e5771bbd080c8a2a11c0579a3082cedbf94a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2   | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx    | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2   | FileCheck %s --check-prefixes=AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512DQBW
8
9; PR28925
10
11define <4 x i32> @test1(<4 x i1> %cond, <4 x i32> %x) {
12; SSE-LABEL: test1:
13; SSE:       # %bb.0:
14; SSE-NEXT:    pslld $31, %xmm0
15; SSE-NEXT:    psrad $31, %xmm0
16; SSE-NEXT:    pandn %xmm1, %xmm0
17; SSE-NEXT:    retq
18;
19; AVX-LABEL: test1:
20; AVX:       # %bb.0:
21; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
22; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
23; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
24; AVX-NEXT:    retq
25;
26; AVX512F-LABEL: test1:
27; AVX512F:       # %bb.0:
28; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
29; AVX512F-NEXT:    vptestnmd %xmm0, %xmm0, %k1
30; AVX512F-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1} {z}
31; AVX512F-NEXT:    retq
32;
33; AVX512DQBW-LABEL: test1:
34; AVX512DQBW:       # %bb.0:
35; AVX512DQBW-NEXT:    vpslld $31, %xmm0, %xmm0
36; AVX512DQBW-NEXT:    vpmovd2m %xmm0, %k0
37; AVX512DQBW-NEXT:    knotw %k0, %k1
38; AVX512DQBW-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1} {z}
39; AVX512DQBW-NEXT:    retq
40  %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x
41  ret <4 x i32> %r
42}
43
44define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %x) {
45; SSE-LABEL: test2:
46; SSE:       # %bb.0:
47; SSE-NEXT:    cmpneqps %xmm1, %xmm0
48; SSE-NEXT:    andps %xmm2, %xmm0
49; SSE-NEXT:    retq
50;
51; AVX-LABEL: test2:
52; AVX:       # %bb.0:
53; AVX-NEXT:    vcmpneqps %xmm1, %xmm0, %xmm0
54; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
55; AVX-NEXT:    retq
56;
57; AVX512-LABEL: test2:
58; AVX512:       # %bb.0:
59; AVX512-NEXT:    vcmpneqps %xmm1, %xmm0, %k1
60; AVX512-NEXT:    vmovdqa32 %xmm2, %xmm0 {%k1} {z}
61; AVX512-NEXT:    retq
62  %cond = fcmp oeq <4 x float> %a, %b
63  %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x
64  ret <4 x i32> %r
65}
66
67define float @fsel_zero_false_val(float %a, float %b, float %x) {
68; SSE-LABEL: fsel_zero_false_val:
69; SSE:       # %bb.0:
70; SSE-NEXT:    cmpeqss %xmm1, %xmm0
71; SSE-NEXT:    andps %xmm2, %xmm0
72; SSE-NEXT:    retq
73;
74; AVX-LABEL: fsel_zero_false_val:
75; AVX:       # %bb.0:
76; AVX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0
77; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
78; AVX-NEXT:    retq
79;
80; AVX512-LABEL: fsel_zero_false_val:
81; AVX512:       # %bb.0:
82; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1
83; AVX512-NEXT:    vmovss %xmm2, %xmm2, %xmm0 {%k1} {z}
84; AVX512-NEXT:    retq
85  %cond = fcmp oeq float %a, %b
86  %r = select i1 %cond, float %x, float 0.0
87  ret float %r
88}
89
90define float @fsel_zero_true_val(float %a, float %b, float %x) {
91; SSE-LABEL: fsel_zero_true_val:
92; SSE:       # %bb.0:
93; SSE-NEXT:    cmpeqss %xmm1, %xmm0
94; SSE-NEXT:    andnps %xmm2, %xmm0
95; SSE-NEXT:    retq
96;
97; AVX-LABEL: fsel_zero_true_val:
98; AVX:       # %bb.0:
99; AVX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0
100; AVX-NEXT:    vandnps %xmm2, %xmm0, %xmm0
101; AVX-NEXT:    retq
102;
103; AVX512-LABEL: fsel_zero_true_val:
104; AVX512:       # %bb.0:
105; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1
106; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
107; AVX512-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
108; AVX512-NEXT:    vmovaps %xmm2, %xmm0
109; AVX512-NEXT:    retq
110  %cond = fcmp oeq float %a, %b
111  %r = select i1 %cond, float 0.0, float %x
112  ret float %r
113}
114
115define double @fsel_nonzero_false_val(double %x, double %y, double %z) {
116; SSE-LABEL: fsel_nonzero_false_val:
117; SSE:       # %bb.0:
118; SSE-NEXT:    cmpeqsd %xmm1, %xmm0
119; SSE-NEXT:    andpd %xmm0, %xmm2
120; SSE-NEXT:    movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
121; SSE-NEXT:    andnpd %xmm1, %xmm0
122; SSE-NEXT:    orpd %xmm2, %xmm0
123; SSE-NEXT:    retq
124;
125; AVX-LABEL: fsel_nonzero_false_val:
126; AVX:       # %bb.0:
127; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
128; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
129; AVX-NEXT:    # xmm1 = mem[0,0]
130; AVX-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
131; AVX-NEXT:    retq
132;
133; AVX512-LABEL: fsel_nonzero_false_val:
134; AVX512:       # %bb.0:
135; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1
136; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
137; AVX512-NEXT:    vmovsd %xmm2, %xmm0, %xmm0 {%k1}
138; AVX512-NEXT:    retq
139  %cond = fcmp oeq double %x, %y
140  %r = select i1 %cond, double %z, double 42.0
141  ret double %r
142}
143
144define double @fsel_nonzero_true_val(double %x, double %y, double %z) {
145; SSE-LABEL: fsel_nonzero_true_val:
146; SSE:       # %bb.0:
147; SSE-NEXT:    cmpeqsd %xmm1, %xmm0
148; SSE-NEXT:    movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
149; SSE-NEXT:    andpd %xmm0, %xmm1
150; SSE-NEXT:    andnpd %xmm2, %xmm0
151; SSE-NEXT:    orpd %xmm1, %xmm0
152; SSE-NEXT:    retq
153;
154; AVX-LABEL: fsel_nonzero_true_val:
155; AVX:       # %bb.0:
156; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
157; AVX-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
158; AVX-NEXT:    retq
159;
160; AVX512-LABEL: fsel_nonzero_true_val:
161; AVX512:       # %bb.0:
162; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1
163; AVX512-NEXT:    vmovsd {{.*#+}} xmm2 {%k1} = [4.2E+1,0.0E+0]
164; AVX512-NEXT:    vmovapd %xmm2, %xmm0
165; AVX512-NEXT:    retq
166  %cond = fcmp oeq double %x, %y
167  %r = select i1 %cond, double 42.0, double %z
168  ret double %r
169}
170
171define double @fsel_nonzero_constants(double %x, double %y) {
172; SSE-LABEL: fsel_nonzero_constants:
173; SSE:       # %bb.0:
174; SSE-NEXT:    cmpeqsd %xmm1, %xmm0
175; SSE-NEXT:    movq %xmm0, %rax
176; SSE-NEXT:    andl $1, %eax
177; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
178; SSE-NEXT:    retq
179;
180; AVX-LABEL: fsel_nonzero_constants:
181; AVX:       # %bb.0:
182; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
183; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
184; AVX-NEXT:    # xmm1 = mem[0,0]
185; AVX-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
186; AVX-NEXT:    retq
187;
188; AVX512-LABEL: fsel_nonzero_constants:
189; AVX512:       # %bb.0:
190; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1
191; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
192; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 {%k1} = [1.2E+1,0.0E+0]
193; AVX512-NEXT:    retq
194  %cond = fcmp oeq double %x, %y
195  %r = select i1 %cond, double 12.0, double 42.0
196  ret double %r
197}
198
199define <2 x double> @vsel_nonzero_constants(<2 x double> %x, <2 x double> %y) {
200; SSE2-LABEL: vsel_nonzero_constants:
201; SSE2:       # %bb.0:
202; SSE2-NEXT:    cmplepd %xmm0, %xmm1
203; SSE2-NEXT:    movsd {{.*#+}} xmm2 = [4.2E+1,0.0E+0]
204; SSE2-NEXT:    movapd %xmm1, %xmm0
205; SSE2-NEXT:    andnpd %xmm2, %xmm0
206; SSE2-NEXT:    andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
207; SSE2-NEXT:    orpd %xmm1, %xmm0
208; SSE2-NEXT:    retq
209;
210; SSE42-LABEL: vsel_nonzero_constants:
211; SSE42:       # %bb.0:
212; SSE42-NEXT:    cmplepd %xmm0, %xmm1
213; SSE42-NEXT:    movsd {{.*#+}} xmm2 = [4.2E+1,0.0E+0]
214; SSE42-NEXT:    movapd %xmm1, %xmm0
215; SSE42-NEXT:    blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
216; SSE42-NEXT:    movapd %xmm2, %xmm0
217; SSE42-NEXT:    retq
218;
219; AVX-LABEL: vsel_nonzero_constants:
220; AVX:       # %bb.0:
221; AVX-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
222; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
223; AVX-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
224; AVX-NEXT:    retq
225;
226; AVX512-LABEL: vsel_nonzero_constants:
227; AVX512:       # %bb.0:
228; AVX512-NEXT:    vcmplepd %xmm0, %xmm1, %k1
229; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
230; AVX512-NEXT:    vmovapd {{.*#+}} xmm0 {%k1} = [1.2E+1,-1.0E+0]
231; AVX512-NEXT:    retq
232  %cond = fcmp oge <2 x double> %x, %y
233  %r = select <2 x i1> %cond, <2 x double> <double 12.0, double -1.0>, <2 x double> <double 42.0, double 0.0>
234  ret <2 x double> %r
235}
236
237define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) {
238; SSE-LABEL: signbit_mask_v16i8:
239; SSE:       # %bb.0:
240; SSE-NEXT:    pxor %xmm2, %xmm2
241; SSE-NEXT:    pcmpgtb %xmm0, %xmm2
242; SSE-NEXT:    pand %xmm1, %xmm2
243; SSE-NEXT:    movdqa %xmm2, %xmm0
244; SSE-NEXT:    retq
245;
246; AVX-LABEL: signbit_mask_v16i8:
247; AVX:       # %bb.0:
248; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
249; AVX-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
250; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
251; AVX-NEXT:    retq
252;
253; AVX512-LABEL: signbit_mask_v16i8:
254; AVX512:       # %bb.0:
255; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
256; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
257; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
258; AVX512-NEXT:    retq
259  %cond = icmp slt <16 x i8> %a, zeroinitializer
260  %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer
261  ret <16 x i8> %r
262}
263
264define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) {
265; SSE-LABEL: signbit_mask_v8i16:
266; SSE:       # %bb.0:
267; SSE-NEXT:    psraw $15, %xmm0
268; SSE-NEXT:    pand %xmm1, %xmm0
269; SSE-NEXT:    retq
270;
271; AVX-LABEL: signbit_mask_v8i16:
272; AVX:       # %bb.0:
273; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
274; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
275; AVX-NEXT:    retq
276;
277; AVX512-LABEL: signbit_mask_v8i16:
278; AVX512:       # %bb.0:
279; AVX512-NEXT:    vpsraw $15, %xmm0, %xmm0
280; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
281; AVX512-NEXT:    retq
282  %cond = icmp slt <8 x i16> %a, zeroinitializer
283  %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer
284  ret <8 x i16> %r
285}
286
287define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) {
288; SSE-LABEL: signbit_mask_v4i32:
289; SSE:       # %bb.0:
290; SSE-NEXT:    psrad $31, %xmm0
291; SSE-NEXT:    pand %xmm1, %xmm0
292; SSE-NEXT:    retq
293;
294; AVX-LABEL: signbit_mask_v4i32:
295; AVX:       # %bb.0:
296; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
297; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
298; AVX-NEXT:    retq
299;
300; AVX512-LABEL: signbit_mask_v4i32:
301; AVX512:       # %bb.0:
302; AVX512-NEXT:    vpsrad $31, %xmm0, %xmm0
303; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
304; AVX512-NEXT:    retq
305  %cond = icmp slt <4 x i32> %a, zeroinitializer
306  %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer
307  ret <4 x i32> %r
308}
309
310define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) {
311; SSE2-LABEL: signbit_mask_v2i64:
312; SSE2:       # %bb.0:
313; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
314; SSE2-NEXT:    psrad $31, %xmm0
315; SSE2-NEXT:    pand %xmm1, %xmm0
316; SSE2-NEXT:    retq
317;
318; SSE42-LABEL: signbit_mask_v2i64:
319; SSE42:       # %bb.0:
320; SSE42-NEXT:    pxor %xmm2, %xmm2
321; SSE42-NEXT:    pcmpgtq %xmm0, %xmm2
322; SSE42-NEXT:    pand %xmm1, %xmm2
323; SSE42-NEXT:    movdqa %xmm2, %xmm0
324; SSE42-NEXT:    retq
325;
326; AVX-LABEL: signbit_mask_v2i64:
327; AVX:       # %bb.0:
328; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
329; AVX-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm0
330; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
331; AVX-NEXT:    retq
332;
333; AVX512-LABEL: signbit_mask_v2i64:
334; AVX512:       # %bb.0:
335; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
336; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
337; AVX512-NEXT:    retq
338  %cond = icmp slt <2 x i64> %a, zeroinitializer
339  %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer
340  ret <2 x i64> %r
341}
342
343; Swap cmp pred and select ops. This is logically equivalent to the above test.
344
345define <2 x i64> @signbit_mask_swap_v2i64(<2 x i64> %a, <2 x i64> %b) {
346; SSE2-LABEL: signbit_mask_swap_v2i64:
347; SSE2:       # %bb.0:
348; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
349; SSE2-NEXT:    psrad $31, %xmm0
350; SSE2-NEXT:    pand %xmm1, %xmm0
351; SSE2-NEXT:    retq
352;
353; SSE42-LABEL: signbit_mask_swap_v2i64:
354; SSE42:       # %bb.0:
355; SSE42-NEXT:    pxor %xmm2, %xmm2
356; SSE42-NEXT:    pcmpgtq %xmm0, %xmm2
357; SSE42-NEXT:    pand %xmm1, %xmm2
358; SSE42-NEXT:    movdqa %xmm2, %xmm0
359; SSE42-NEXT:    retq
360;
361; AVX-LABEL: signbit_mask_swap_v2i64:
362; AVX:       # %bb.0:
363; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
364; AVX-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm0
365; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
366; AVX-NEXT:    retq
367;
368; AVX512-LABEL: signbit_mask_swap_v2i64:
369; AVX512:       # %bb.0:
370; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
371; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
372; AVX512-NEXT:    retq
373  %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1>
374  %r = select <2 x i1> %cond, <2 x i64> zeroinitializer, <2 x i64> %b
375  ret <2 x i64> %r
376}
377
378define <32 x i8> @signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) {
379; SSE-LABEL: signbit_mask_v32i8:
380; SSE:       # %bb.0:
381; SSE-NEXT:    pxor %xmm4, %xmm4
382; SSE-NEXT:    pxor %xmm5, %xmm5
383; SSE-NEXT:    pcmpgtb %xmm0, %xmm5
384; SSE-NEXT:    pand %xmm2, %xmm5
385; SSE-NEXT:    pcmpgtb %xmm1, %xmm4
386; SSE-NEXT:    pand %xmm3, %xmm4
387; SSE-NEXT:    movdqa %xmm5, %xmm0
388; SSE-NEXT:    movdqa %xmm4, %xmm1
389; SSE-NEXT:    retq
390;
391; AVX1-LABEL: signbit_mask_v32i8:
392; AVX1:       # %bb.0:
393; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
394; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
395; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
396; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm3, %xmm0
397; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
398; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
399; AVX1-NEXT:    retq
400;
401; AVX2-LABEL: signbit_mask_v32i8:
402; AVX2:       # %bb.0:
403; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
404; AVX2-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
405; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
406; AVX2-NEXT:    retq
407;
408; AVX512-LABEL: signbit_mask_v32i8:
409; AVX512:       # %bb.0:
410; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
411; AVX512-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
412; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
413; AVX512-NEXT:    retq
414  %cond = icmp slt <32 x i8> %a, zeroinitializer
415  %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer
416  ret <32 x i8> %r
417}
418
419define <16 x i16> @signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) {
420; SSE-LABEL: signbit_mask_v16i16:
421; SSE:       # %bb.0:
422; SSE-NEXT:    psraw $15, %xmm0
423; SSE-NEXT:    pand %xmm2, %xmm0
424; SSE-NEXT:    psraw $15, %xmm1
425; SSE-NEXT:    pand %xmm3, %xmm1
426; SSE-NEXT:    retq
427;
428; AVX1-LABEL: signbit_mask_v16i16:
429; AVX1:       # %bb.0:
430; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm2
431; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
432; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
433; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
434; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
435; AVX1-NEXT:    retq
436;
437; AVX2-LABEL: signbit_mask_v16i16:
438; AVX2:       # %bb.0:
439; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
440; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
441; AVX2-NEXT:    retq
442;
443; AVX512-LABEL: signbit_mask_v16i16:
444; AVX512:       # %bb.0:
445; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm0
446; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
447; AVX512-NEXT:    retq
448  %cond = icmp slt <16 x i16> %a, zeroinitializer
449  %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer
450  ret <16 x i16> %r
451}
452
453define <8 x i32> @signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) {
454; SSE-LABEL: signbit_mask_v8i32:
455; SSE:       # %bb.0:
456; SSE-NEXT:    psrad $31, %xmm0
457; SSE-NEXT:    pand %xmm2, %xmm0
458; SSE-NEXT:    psrad $31, %xmm1
459; SSE-NEXT:    pand %xmm3, %xmm1
460; SSE-NEXT:    retq
461;
462; AVX1-LABEL: signbit_mask_v8i32:
463; AVX1:       # %bb.0:
464; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
465; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
466; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
467; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
468; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
469; AVX1-NEXT:    retq
470;
471; AVX2-LABEL: signbit_mask_v8i32:
472; AVX2:       # %bb.0:
473; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
474; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
475; AVX2-NEXT:    retq
476;
477; AVX512-LABEL: signbit_mask_v8i32:
478; AVX512:       # %bb.0:
479; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
480; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
481; AVX512-NEXT:    retq
482  %cond = icmp slt <8 x i32> %a, zeroinitializer
483  %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer
484  ret <8 x i32> %r
485}
486
487; Swap cmp pred and select ops. This is logically equivalent to the above test.
488
489define <8 x i32> @signbit_mask_swap_v8i32(<8 x i32> %a, <8 x i32> %b) {
490; SSE-LABEL: signbit_mask_swap_v8i32:
491; SSE:       # %bb.0:
492; SSE-NEXT:    psrad $31, %xmm0
493; SSE-NEXT:    pand %xmm2, %xmm0
494; SSE-NEXT:    psrad $31, %xmm1
495; SSE-NEXT:    pand %xmm3, %xmm1
496; SSE-NEXT:    retq
497;
498; AVX1-LABEL: signbit_mask_swap_v8i32:
499; AVX1:       # %bb.0:
500; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
501; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
502; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
503; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
504; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
505; AVX1-NEXT:    retq
506;
507; AVX2-LABEL: signbit_mask_swap_v8i32:
508; AVX2:       # %bb.0:
509; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
510; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
511; AVX2-NEXT:    retq
512;
513; AVX512-LABEL: signbit_mask_swap_v8i32:
514; AVX512:       # %bb.0:
515; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
516; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
517; AVX512-NEXT:    retq
518  %cond = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
519  %r = select <8 x i1> %cond, <8 x i32> zeroinitializer, <8 x i32> %b
520  ret <8 x i32> %r
521}
522
523define <4 x i64> @signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) {
524; SSE2-LABEL: signbit_mask_v4i64:
525; SSE2:       # %bb.0:
526; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
527; SSE2-NEXT:    psrad $31, %xmm0
528; SSE2-NEXT:    pand %xmm2, %xmm0
529; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
530; SSE2-NEXT:    psrad $31, %xmm1
531; SSE2-NEXT:    pand %xmm3, %xmm1
532; SSE2-NEXT:    retq
533;
534; SSE42-LABEL: signbit_mask_v4i64:
535; SSE42:       # %bb.0:
536; SSE42-NEXT:    pxor %xmm4, %xmm4
537; SSE42-NEXT:    pxor %xmm5, %xmm5
538; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
539; SSE42-NEXT:    pand %xmm2, %xmm5
540; SSE42-NEXT:    pcmpgtq %xmm1, %xmm4
541; SSE42-NEXT:    pand %xmm3, %xmm4
542; SSE42-NEXT:    movdqa %xmm5, %xmm0
543; SSE42-NEXT:    movdqa %xmm4, %xmm1
544; SSE42-NEXT:    retq
545;
546; AVX1-LABEL: signbit_mask_v4i64:
547; AVX1:       # %bb.0:
548; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
549; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
550; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
551; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
552; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
553; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
554; AVX1-NEXT:    retq
555;
556; AVX2-LABEL: signbit_mask_v4i64:
557; AVX2:       # %bb.0:
558; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
559; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm0
560; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
561; AVX2-NEXT:    retq
562;
563; AVX512-LABEL: signbit_mask_v4i64:
564; AVX512:       # %bb.0:
565; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
566; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
567; AVX512-NEXT:    retq
568  %cond = icmp slt <4 x i64> %a, zeroinitializer
569  %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer
570  ret <4 x i64> %r
571}
572
573define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) {
574; SSE-LABEL: signbit_setmask_v16i8:
575; SSE:       # %bb.0:
576; SSE-NEXT:    pxor %xmm2, %xmm2
577; SSE-NEXT:    pcmpgtb %xmm0, %xmm2
578; SSE-NEXT:    por %xmm1, %xmm2
579; SSE-NEXT:    movdqa %xmm2, %xmm0
580; SSE-NEXT:    retq
581;
582; AVX-LABEL: signbit_setmask_v16i8:
583; AVX:       # %bb.0:
584; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
585; AVX-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
586; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
587; AVX-NEXT:    retq
588;
589; AVX512-LABEL: signbit_setmask_v16i8:
590; AVX512:       # %bb.0:
591; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
592; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
593; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
594; AVX512-NEXT:    retq
595  %cond = icmp slt <16 x i8> %a, zeroinitializer
596  %r = select <16 x i1> %cond, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %b
597  ret <16 x i8> %r
598}
599
600; Swap cmp pred and select ops. This is logically equivalent to the above test.
601
602define <16 x i8> @signbit_setmask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) {
603; SSE-LABEL: signbit_setmask_swap_v16i8:
604; SSE:       # %bb.0:
605; SSE-NEXT:    pxor %xmm2, %xmm2
606; SSE-NEXT:    pcmpgtb %xmm0, %xmm2
607; SSE-NEXT:    por %xmm1, %xmm2
608; SSE-NEXT:    movdqa %xmm2, %xmm0
609; SSE-NEXT:    retq
610;
611; AVX-LABEL: signbit_setmask_swap_v16i8:
612; AVX:       # %bb.0:
613; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
614; AVX-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
615; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
616; AVX-NEXT:    retq
617;
618; AVX512-LABEL: signbit_setmask_swap_v16i8:
619; AVX512:       # %bb.0:
620; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
621; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
622; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
623; AVX512-NEXT:    retq
624  %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
625  %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
626  ret <16 x i8> %r
627}
628
629define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) {
630; SSE-LABEL: signbit_setmask_v8i16:
631; SSE:       # %bb.0:
632; SSE-NEXT:    psraw $15, %xmm0
633; SSE-NEXT:    por %xmm1, %xmm0
634; SSE-NEXT:    retq
635;
636; AVX-LABEL: signbit_setmask_v8i16:
637; AVX:       # %bb.0:
638; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
639; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
640; AVX-NEXT:    retq
641;
642; AVX512-LABEL: signbit_setmask_v8i16:
643; AVX512:       # %bb.0:
644; AVX512-NEXT:    vpsraw $15, %xmm0, %xmm0
645; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
646; AVX512-NEXT:    retq
647  %cond = icmp slt <8 x i16> %a, zeroinitializer
648  %r = select <8 x i1> %cond, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %b
649  ret <8 x i16> %r
650}
651
652define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) {
653; SSE-LABEL: signbit_setmask_v4i32:
654; SSE:       # %bb.0:
655; SSE-NEXT:    psrad $31, %xmm0
656; SSE-NEXT:    por %xmm1, %xmm0
657; SSE-NEXT:    retq
658;
659; AVX-LABEL: signbit_setmask_v4i32:
660; AVX:       # %bb.0:
661; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
662; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
663; AVX-NEXT:    retq
664;
665; AVX512-LABEL: signbit_setmask_v4i32:
666; AVX512:       # %bb.0:
667; AVX512-NEXT:    vpsrad $31, %xmm0, %xmm0
668; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
669; AVX512-NEXT:    retq
670  %cond = icmp slt <4 x i32> %a, zeroinitializer
671  %r = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %b
672  ret <4 x i32> %r
673}
674
675define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) {
676; SSE2-LABEL: signbit_setmask_v2i64:
677; SSE2:       # %bb.0:
678; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
679; SSE2-NEXT:    psrad $31, %xmm0
680; SSE2-NEXT:    por %xmm1, %xmm0
681; SSE2-NEXT:    retq
682;
683; SSE42-LABEL: signbit_setmask_v2i64:
684; SSE42:       # %bb.0:
685; SSE42-NEXT:    pxor %xmm2, %xmm2
686; SSE42-NEXT:    pcmpgtq %xmm0, %xmm2
687; SSE42-NEXT:    por %xmm1, %xmm2
688; SSE42-NEXT:    movdqa %xmm2, %xmm0
689; SSE42-NEXT:    retq
690;
691; AVX-LABEL: signbit_setmask_v2i64:
692; AVX:       # %bb.0:
693; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
694; AVX-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm0
695; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
696; AVX-NEXT:    retq
697;
698; AVX512-LABEL: signbit_setmask_v2i64:
699; AVX512:       # %bb.0:
700; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
701; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
702; AVX512-NEXT:    retq
703  %cond = icmp slt <2 x i64> %a, zeroinitializer
704  %r = select <2 x i1> %cond, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %b
705  ret <2 x i64> %r
706}
707
708define <32 x i8> @signbit_setmask_v32i8(<32 x i8> %a, <32 x i8> %b) {
709; SSE-LABEL: signbit_setmask_v32i8:
710; SSE:       # %bb.0:
711; SSE-NEXT:    pxor %xmm4, %xmm4
712; SSE-NEXT:    pxor %xmm5, %xmm5
713; SSE-NEXT:    pcmpgtb %xmm0, %xmm5
714; SSE-NEXT:    por %xmm2, %xmm5
715; SSE-NEXT:    pcmpgtb %xmm1, %xmm4
716; SSE-NEXT:    por %xmm3, %xmm4
717; SSE-NEXT:    movdqa %xmm5, %xmm0
718; SSE-NEXT:    movdqa %xmm4, %xmm1
719; SSE-NEXT:    retq
720;
721; AVX1-LABEL: signbit_setmask_v32i8:
722; AVX1:       # %bb.0:
723; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
724; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
725; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
726; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm3, %xmm0
727; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
728; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
729; AVX1-NEXT:    retq
730;
731; AVX2-LABEL: signbit_setmask_v32i8:
732; AVX2:       # %bb.0:
733; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
734; AVX2-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
735; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
736; AVX2-NEXT:    retq
737;
738; AVX512-LABEL: signbit_setmask_v32i8:
739; AVX512:       # %bb.0:
740; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
741; AVX512-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
742; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
743; AVX512-NEXT:    retq
744  %cond = icmp slt <32 x i8> %a, zeroinitializer
745  %r = select <32 x i1> %cond, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <32 x i8> %b
746  ret <32 x i8> %r
747}
748
749define <16 x i16> @signbit_setmask_v16i16(<16 x i16> %a, <16 x i16> %b) {
750; SSE-LABEL: signbit_setmask_v16i16:
751; SSE:       # %bb.0:
752; SSE-NEXT:    psraw $15, %xmm0
753; SSE-NEXT:    por %xmm2, %xmm0
754; SSE-NEXT:    psraw $15, %xmm1
755; SSE-NEXT:    por %xmm3, %xmm1
756; SSE-NEXT:    retq
757;
758; AVX1-LABEL: signbit_setmask_v16i16:
759; AVX1:       # %bb.0:
760; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm2
761; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
762; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
763; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
764; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
765; AVX1-NEXT:    retq
766;
767; AVX2-LABEL: signbit_setmask_v16i16:
768; AVX2:       # %bb.0:
769; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
770; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
771; AVX2-NEXT:    retq
772;
773; AVX512-LABEL: signbit_setmask_v16i16:
774; AVX512:       # %bb.0:
775; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm0
776; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
777; AVX512-NEXT:    retq
778  %cond = icmp slt <16 x i16> %a, zeroinitializer
779  %r = select <16 x i1> %cond, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <16 x i16> %b
780  ret <16 x i16> %r
781}
782
783define <8 x i32> @signbit_setmask_v8i32(<8 x i32> %a, <8 x i32> %b) {
784; SSE-LABEL: signbit_setmask_v8i32:
785; SSE:       # %bb.0:
786; SSE-NEXT:    psrad $31, %xmm0
787; SSE-NEXT:    por %xmm2, %xmm0
788; SSE-NEXT:    psrad $31, %xmm1
789; SSE-NEXT:    por %xmm3, %xmm1
790; SSE-NEXT:    retq
791;
792; AVX1-LABEL: signbit_setmask_v8i32:
793; AVX1:       # %bb.0:
794; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
795; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
796; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
797; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
798; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
799; AVX1-NEXT:    retq
800;
801; AVX2-LABEL: signbit_setmask_v8i32:
802; AVX2:       # %bb.0:
803; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
804; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
805; AVX2-NEXT:    retq
806;
807; AVX512-LABEL: signbit_setmask_v8i32:
808; AVX512:       # %bb.0:
809; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
810; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
811; AVX512-NEXT:    retq
812  %cond = icmp slt <8 x i32> %a, zeroinitializer
813  %r = select <8 x i1> %cond, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> %b
814  ret <8 x i32> %r
815}
816
817define <4 x i64> @signbit_setmask_v4i64(<4 x i64> %a, <4 x i64> %b) {
818; SSE2-LABEL: signbit_setmask_v4i64:
819; SSE2:       # %bb.0:
820; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
821; SSE2-NEXT:    psrad $31, %xmm0
822; SSE2-NEXT:    por %xmm2, %xmm0
823; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
824; SSE2-NEXT:    psrad $31, %xmm1
825; SSE2-NEXT:    por %xmm3, %xmm1
826; SSE2-NEXT:    retq
827;
828; SSE42-LABEL: signbit_setmask_v4i64:
829; SSE42:       # %bb.0:
830; SSE42-NEXT:    pxor %xmm4, %xmm4
831; SSE42-NEXT:    pxor %xmm5, %xmm5
832; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
833; SSE42-NEXT:    por %xmm2, %xmm5
834; SSE42-NEXT:    pcmpgtq %xmm1, %xmm4
835; SSE42-NEXT:    por %xmm3, %xmm4
836; SSE42-NEXT:    movdqa %xmm5, %xmm0
837; SSE42-NEXT:    movdqa %xmm4, %xmm1
838; SSE42-NEXT:    retq
839;
840; AVX1-LABEL: signbit_setmask_v4i64:
841; AVX1:       # %bb.0:
842; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
843; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
844; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
845; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
846; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
847; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
848; AVX1-NEXT:    retq
849;
850; AVX2-LABEL: signbit_setmask_v4i64:
851; AVX2:       # %bb.0:
852; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
853; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm0
854; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
855; AVX2-NEXT:    retq
856;
857; AVX512-LABEL: signbit_setmask_v4i64:
858; AVX512:       # %bb.0:
859; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
860; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
861; AVX512-NEXT:    retq
862  %cond = icmp slt <4 x i64> %a, zeroinitializer
863  %r = select <4 x i1> %cond, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> %b
864  ret <4 x i64> %r
865}
866
867; Swap cmp pred and select ops. This is logically equivalent to the above test.
868
869define <4 x i64> @signbit_setmask_swap_v4i64(<4 x i64> %a, <4 x i64> %b) {
870; SSE2-LABEL: signbit_setmask_swap_v4i64:
871; SSE2:       # %bb.0:
872; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
873; SSE2-NEXT:    psrad $31, %xmm0
874; SSE2-NEXT:    por %xmm2, %xmm0
875; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
876; SSE2-NEXT:    psrad $31, %xmm1
877; SSE2-NEXT:    por %xmm3, %xmm1
878; SSE2-NEXT:    retq
879;
880; SSE42-LABEL: signbit_setmask_swap_v4i64:
881; SSE42:       # %bb.0:
882; SSE42-NEXT:    pxor %xmm4, %xmm4
883; SSE42-NEXT:    pxor %xmm5, %xmm5
884; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
885; SSE42-NEXT:    por %xmm2, %xmm5
886; SSE42-NEXT:    pcmpgtq %xmm1, %xmm4
887; SSE42-NEXT:    por %xmm3, %xmm4
888; SSE42-NEXT:    movdqa %xmm5, %xmm0
889; SSE42-NEXT:    movdqa %xmm4, %xmm1
890; SSE42-NEXT:    retq
891;
892; AVX1-LABEL: signbit_setmask_swap_v4i64:
893; AVX1:       # %bb.0:
894; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
895; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
896; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
897; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
898; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
899; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
900; AVX1-NEXT:    retq
901;
902; AVX2-LABEL: signbit_setmask_swap_v4i64:
903; AVX2:       # %bb.0:
904; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
905; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm0
906; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
907; AVX2-NEXT:    retq
908;
909; AVX512-LABEL: signbit_setmask_swap_v4i64:
910; AVX512:       # %bb.0:
911; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
912; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
913; AVX512-NEXT:    retq
914  %cond = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
915  %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
916  ret <4 x i64> %r
917}
918
919define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) {
920; SSE-LABEL: not_signbit_mask_v16i8:
921; SSE:       # %bb.0:
922; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
923; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
924; SSE-NEXT:    pand %xmm1, %xmm0
925; SSE-NEXT:    retq
926;
927; AVX-LABEL: not_signbit_mask_v16i8:
928; AVX:       # %bb.0:
929; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
930; AVX-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
931; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
932; AVX-NEXT:    retq
933;
934; AVX512-LABEL: not_signbit_mask_v16i8:
935; AVX512:       # %bb.0:
936; AVX512-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
937; AVX512-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
938; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
939; AVX512-NEXT:    retq
940  %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
941  %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer
942  ret <16 x i8> %r
943}
944
945define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) {
946; SSE-LABEL: not_signbit_mask_v8i16:
947; SSE:       # %bb.0:
948; SSE-NEXT:    psraw $15, %xmm0
949; SSE-NEXT:    pandn %xmm1, %xmm0
950; SSE-NEXT:    retq
951;
952; AVX-LABEL: not_signbit_mask_v8i16:
953; AVX:       # %bb.0:
954; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
955; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
956; AVX-NEXT:    retq
957;
958; AVX512-LABEL: not_signbit_mask_v8i16:
959; AVX512:       # %bb.0:
960; AVX512-NEXT:    vpsraw $15, %xmm0, %xmm0
961; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0
962; AVX512-NEXT:    retq
963  %cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
964  %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer
965  ret <8 x i16> %r
966}
967
968; Swap cmp pred and select ops. This is logically equivalent to the above test.
969
970define <8 x i16> @not_signbit_mask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) {
971; SSE-LABEL: not_signbit_mask_swap_v8i16:
972; SSE:       # %bb.0:
973; SSE-NEXT:    psraw $15, %xmm0
974; SSE-NEXT:    pandn %xmm1, %xmm0
975; SSE-NEXT:    retq
976;
977; AVX-LABEL: not_signbit_mask_swap_v8i16:
978; AVX:       # %bb.0:
979; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
980; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
981; AVX-NEXT:    retq
982;
983; AVX512-LABEL: not_signbit_mask_swap_v8i16:
984; AVX512:       # %bb.0:
985; AVX512-NEXT:    vpsraw $15, %xmm0, %xmm0
986; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0
987; AVX512-NEXT:    retq
988  %cond = icmp slt <8 x i16> %a, zeroinitializer
989  %r = select <8 x i1> %cond, <8 x i16> zeroinitializer, <8 x i16> %b
990  ret <8 x i16> %r
991}
992
993define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) {
994; SSE-LABEL: not_signbit_mask_v4i32:
995; SSE:       # %bb.0:
996; SSE-NEXT:    psrad $31, %xmm0
997; SSE-NEXT:    pandn %xmm1, %xmm0
998; SSE-NEXT:    retq
999;
1000; AVX-LABEL: not_signbit_mask_v4i32:
1001; AVX:       # %bb.0:
1002; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
1003; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1004; AVX-NEXT:    retq
1005;
1006; AVX512-LABEL: not_signbit_mask_v4i32:
1007; AVX512:       # %bb.0:
1008; AVX512-NEXT:    vpsrad $31, %xmm0, %xmm0
1009; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1010; AVX512-NEXT:    retq
1011  %cond = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
1012  %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer
1013  ret <4 x i32> %r
1014}
1015
1016define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) {
1017; SSE2-LABEL: not_signbit_mask_v2i64:
1018; SSE2:       # %bb.0:
1019; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1020; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1021; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
1022; SSE2-NEXT:    pand %xmm1, %xmm0
1023; SSE2-NEXT:    retq
1024;
1025; SSE42-LABEL: not_signbit_mask_v2i64:
1026; SSE42:       # %bb.0:
1027; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
1028; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1029; SSE42-NEXT:    pand %xmm1, %xmm0
1030; SSE42-NEXT:    retq
1031;
1032; AVX-LABEL: not_signbit_mask_v2i64:
1033; AVX:       # %bb.0:
1034; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1035; AVX-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
1036; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
1037; AVX-NEXT:    retq
1038;
1039; AVX512-LABEL: not_signbit_mask_v2i64:
1040; AVX512:       # %bb.0:
1041; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
1042; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1043; AVX512-NEXT:    retq
1044  %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1>
1045  %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer
1046  ret <2 x i64> %r
1047}
1048
1049define <32 x i8> @not_signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) {
1050; SSE-LABEL: not_signbit_mask_v32i8:
1051; SSE:       # %bb.0:
1052; SSE-NEXT:    pcmpeqd %xmm4, %xmm4
1053; SSE-NEXT:    pcmpgtb %xmm4, %xmm0
1054; SSE-NEXT:    pand %xmm2, %xmm0
1055; SSE-NEXT:    pcmpgtb %xmm4, %xmm1
1056; SSE-NEXT:    pand %xmm3, %xmm1
1057; SSE-NEXT:    retq
1058;
1059; AVX1-LABEL: not_signbit_mask_v32i8:
1060; AVX1:       # %bb.0:
1061; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1062; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1063; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
1064; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm3, %xmm0
1065; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1066; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
1067; AVX1-NEXT:    retq
1068;
1069; AVX2-LABEL: not_signbit_mask_v32i8:
1070; AVX2:       # %bb.0:
1071; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1072; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1073; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1074; AVX2-NEXT:    retq
1075;
1076; AVX512-LABEL: not_signbit_mask_v32i8:
1077; AVX512:       # %bb.0:
1078; AVX512-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1079; AVX512-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1080; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
1081; AVX512-NEXT:    retq
1082  %cond = icmp sgt <32 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1083  %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer
1084  ret <32 x i8> %r
1085}
1086
1087define <16 x i16> @not_signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) {
1088; SSE-LABEL: not_signbit_mask_v16i16:
1089; SSE:       # %bb.0:
1090; SSE-NEXT:    psraw $15, %xmm0
1091; SSE-NEXT:    pandn %xmm2, %xmm0
1092; SSE-NEXT:    psraw $15, %xmm1
1093; SSE-NEXT:    pandn %xmm3, %xmm1
1094; SSE-NEXT:    retq
1095;
1096; AVX1-LABEL: not_signbit_mask_v16i16:
1097; AVX1:       # %bb.0:
1098; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm2
1099; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1100; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
1101; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
1102; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
1103; AVX1-NEXT:    retq
1104;
1105; AVX2-LABEL: not_signbit_mask_v16i16:
1106; AVX2:       # %bb.0:
1107; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
1108; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1109; AVX2-NEXT:    retq
1110;
1111; AVX512-LABEL: not_signbit_mask_v16i16:
1112; AVX512:       # %bb.0:
1113; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm0
1114; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1115; AVX512-NEXT:    retq
1116  %cond = icmp sgt <16 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1117  %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer
1118  ret <16 x i16> %r
1119}
1120
1121define <8 x i32> @not_signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) {
1122; SSE-LABEL: not_signbit_mask_v8i32:
1123; SSE:       # %bb.0:
1124; SSE-NEXT:    psrad $31, %xmm0
1125; SSE-NEXT:    pandn %xmm2, %xmm0
1126; SSE-NEXT:    psrad $31, %xmm1
1127; SSE-NEXT:    pandn %xmm3, %xmm1
1128; SSE-NEXT:    retq
1129;
1130; AVX1-LABEL: not_signbit_mask_v8i32:
1131; AVX1:       # %bb.0:
1132; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
1133; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1134; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
1135; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
1136; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
1137; AVX1-NEXT:    retq
1138;
1139; AVX2-LABEL: not_signbit_mask_v8i32:
1140; AVX2:       # %bb.0:
1141; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1142; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1143; AVX2-NEXT:    retq
1144;
1145; AVX512-LABEL: not_signbit_mask_v8i32:
1146; AVX512:       # %bb.0:
1147; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
1148; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1149; AVX512-NEXT:    retq
1150  %cond = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1151  %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer
1152  ret <8 x i32> %r
1153}
1154
1155; Swap cmp pred and select ops. This is logically equivalent to the above test.
1156
1157define <8 x i32> @not_signbit_mask_swap_v8i32(<8 x i32> %a, <8 x i32> %b) {
1158; SSE-LABEL: not_signbit_mask_swap_v8i32:
1159; SSE:       # %bb.0:
1160; SSE-NEXT:    psrad $31, %xmm0
1161; SSE-NEXT:    pandn %xmm2, %xmm0
1162; SSE-NEXT:    psrad $31, %xmm1
1163; SSE-NEXT:    pandn %xmm3, %xmm1
1164; SSE-NEXT:    retq
1165;
1166; AVX1-LABEL: not_signbit_mask_swap_v8i32:
1167; AVX1:       # %bb.0:
1168; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
1169; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1170; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
1171; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
1172; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
1173; AVX1-NEXT:    retq
1174;
1175; AVX2-LABEL: not_signbit_mask_swap_v8i32:
1176; AVX2:       # %bb.0:
1177; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1178; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1179; AVX2-NEXT:    retq
1180;
1181; AVX512-LABEL: not_signbit_mask_swap_v8i32:
1182; AVX512:       # %bb.0:
1183; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
1184; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1185; AVX512-NEXT:    retq
1186  %cond = icmp slt <8 x i32> %a, zeroinitializer
1187  %r = select <8 x i1> %cond, <8 x i32> zeroinitializer, <8 x i32> %b
1188  ret <8 x i32> %r
1189}
1190
1191define <4 x i64> @not_signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) {
1192; SSE2-LABEL: not_signbit_mask_v4i64:
1193; SSE2:       # %bb.0:
1194; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1195; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
1196; SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
1197; SSE2-NEXT:    pand %xmm2, %xmm0
1198; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1199; SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
1200; SSE2-NEXT:    pand %xmm3, %xmm1
1201; SSE2-NEXT:    retq
1202;
1203; SSE42-LABEL: not_signbit_mask_v4i64:
1204; SSE42:       # %bb.0:
1205; SSE42-NEXT:    pcmpeqd %xmm4, %xmm4
1206; SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
1207; SSE42-NEXT:    pand %xmm2, %xmm0
1208; SSE42-NEXT:    pcmpgtq %xmm4, %xmm1
1209; SSE42-NEXT:    pand %xmm3, %xmm1
1210; SSE42-NEXT:    retq
1211;
1212; AVX1-LABEL: not_signbit_mask_v4i64:
1213; AVX1:       # %bb.0:
1214; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1215; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1216; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1217; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
1218; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1219; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
1220; AVX1-NEXT:    retq
1221;
1222; AVX2-LABEL: not_signbit_mask_v4i64:
1223; AVX2:       # %bb.0:
1224; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1225; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
1226; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1227; AVX2-NEXT:    retq
1228;
1229; AVX512-LABEL: not_signbit_mask_v4i64:
1230; AVX512:       # %bb.0:
1231; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
1232; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1233; AVX512-NEXT:    retq
1234  %cond = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
1235  %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer
1236  ret <4 x i64> %r
1237}
1238