xref: /llvm-project/llvm/test/CodeGen/X86/vector-pcmp.ll (revision db50b77ed470d98e68fe8dc03d23b044545dbc82)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2                                   | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2                                 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx                                    | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2                                   | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl                      | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw  | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512DQBW
8
9; Lower common integer comparisons such as 'isPositive' efficiently:
10; https://llvm.org/bugs/show_bug.cgi?id=26701
11
12define <16 x i8> @test_pcmpgtb(<16 x i8> %x) {
13; SSE-LABEL: test_pcmpgtb:
14; SSE:       # %bb.0:
15; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
16; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
17; SSE-NEXT:    retq
18;
19; AVX-LABEL: test_pcmpgtb:
20; AVX:       # %bb.0:
21; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
22; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
23; AVX-NEXT:    retq
24  %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
25  %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
26  ret <16 x i8> %not
27}
28
29define <8 x i16> @test_pcmpgtw(<8 x i16> %x) {
30; SSE-LABEL: test_pcmpgtw:
31; SSE:       # %bb.0:
32; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
33; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
34; SSE-NEXT:    retq
35;
36; AVX-LABEL: test_pcmpgtw:
37; AVX:       # %bb.0:
38; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
39; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
40; AVX-NEXT:    retq
41  %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
42  %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
43  ret <8 x i16> %not
44}
45
46define <4 x i32> @test_pcmpgtd(<4 x i32> %x) {
47; SSE-LABEL: test_pcmpgtd:
48; SSE:       # %bb.0:
49; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
50; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
51; SSE-NEXT:    retq
52;
53; AVX-LABEL: test_pcmpgtd:
54; AVX:       # %bb.0:
55; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
56; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
57; AVX-NEXT:    retq
58  %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
59  %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1>
60  ret <4 x i32> %not
61}
62
63define <2 x i64> @test_pcmpgtq(<2 x i64> %x) {
64; SSE2-LABEL: test_pcmpgtq:
65; SSE2:       # %bb.0:
66; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
67; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
68; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
69; SSE2-NEXT:    retq
70;
71; SSE42-LABEL: test_pcmpgtq:
72; SSE42:       # %bb.0:
73; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
74; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
75; SSE42-NEXT:    retq
76;
77; AVX-LABEL: test_pcmpgtq:
78; AVX:       # %bb.0:
79; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
80; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
81; AVX-NEXT:    retq
82  %sign = ashr <2 x i64> %x, <i64 63, i64 63>
83  %not = xor <2 x i64> %sign, <i64 -1, i64 -1>
84  ret <2 x i64> %not
85}
86
87define <1 x i128> @test_strange_type(<1 x i128> %x) {
88; CHECK-LABEL: test_strange_type:
89; CHECK:       # %bb.0:
90; CHECK-NEXT:    movq %rsi, %rax
91; CHECK-NEXT:    sarq $63, %rax
92; CHECK-NEXT:    notq %rax
93; CHECK-NEXT:    movq %rax, %rdx
94; CHECK-NEXT:    retq
95  %sign = ashr <1 x i128> %x, <i128 127>
96  %not = xor <1 x i128> %sign, <i128 -1>
97  ret <1 x i128> %not
98}
99
100define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) {
101; SSE-LABEL: test_pcmpgtb_256:
102; SSE:       # %bb.0:
103; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
104; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
105; SSE-NEXT:    pcmpgtb %xmm2, %xmm1
106; SSE-NEXT:    retq
107;
108; AVX1-LABEL: test_pcmpgtb_256:
109; AVX1:       # %bb.0:
110; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
111; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
112; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
113; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
114; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
115; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
116; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
117; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
118; AVX1-NEXT:    retq
119;
120; AVX2-LABEL: test_pcmpgtb_256:
121; AVX2:       # %bb.0:
122; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
123; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
124; AVX2-NEXT:    retq
125;
126; AVX512-LABEL: test_pcmpgtb_256:
127; AVX512:       # %bb.0:
128; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
129; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
130; AVX512-NEXT:    retq
131  %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
132  %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
133  ret <32 x i8> %not
134}
135
136define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) {
137; SSE-LABEL: test_pcmpgtw_256:
138; SSE:       # %bb.0:
139; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
140; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
141; SSE-NEXT:    pcmpgtw %xmm2, %xmm1
142; SSE-NEXT:    retq
143;
144; AVX1-LABEL: test_pcmpgtw_256:
145; AVX1:       # %bb.0:
146; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
147; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
148; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
149; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
150; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
151; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
152; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
153; AVX1-NEXT:    retq
154;
155; AVX2-LABEL: test_pcmpgtw_256:
156; AVX2:       # %bb.0:
157; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
158; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
159; AVX2-NEXT:    retq
160;
161; AVX512-LABEL: test_pcmpgtw_256:
162; AVX512:       # %bb.0:
163; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
164; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
165; AVX512-NEXT:    retq
166  %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
167  %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
168  ret <16 x i16> %not
169}
170
171define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) {
172; SSE-LABEL: test_pcmpgtd_256:
173; SSE:       # %bb.0:
174; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
175; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
176; SSE-NEXT:    pcmpgtd %xmm2, %xmm1
177; SSE-NEXT:    retq
178;
179; AVX1-LABEL: test_pcmpgtd_256:
180; AVX1:       # %bb.0:
181; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
182; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
183; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
184; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
185; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
186; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
187; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
188; AVX1-NEXT:    retq
189;
190; AVX2-LABEL: test_pcmpgtd_256:
191; AVX2:       # %bb.0:
192; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
193; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
194; AVX2-NEXT:    retq
195;
196; AVX512-LABEL: test_pcmpgtd_256:
197; AVX512:       # %bb.0:
198; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
199; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
200; AVX512-NEXT:    retq
201  %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
202  %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
203  ret <8 x i32> %not
204}
205
206define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) {
207; SSE2-LABEL: test_pcmpgtq_256:
208; SSE2:       # %bb.0:
209; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
210; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
211; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
212; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
213; SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
214; SSE2-NEXT:    retq
215;
216; SSE42-LABEL: test_pcmpgtq_256:
217; SSE42:       # %bb.0:
218; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
219; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
220; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
221; SSE42-NEXT:    retq
222;
223; AVX1-LABEL: test_pcmpgtq_256:
224; AVX1:       # %bb.0:
225; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
226; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
227; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
228; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm0
229; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
230; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
231; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
232; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
233; AVX1-NEXT:    retq
234;
235; AVX2-LABEL: test_pcmpgtq_256:
236; AVX2:       # %bb.0:
237; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
238; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
239; AVX2-NEXT:    retq
240;
241; AVX512-LABEL: test_pcmpgtq_256:
242; AVX512:       # %bb.0:
243; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
244; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
245; AVX512-NEXT:    retq
246  %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63>
247  %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1>
248  ret <4 x i64> %not
249}
250
251define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) {
252; SSE-LABEL: cmpeq_zext_v16i8:
253; SSE:       # %bb.0:
254; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
255; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
256; SSE-NEXT:    retq
257;
258; AVX1-LABEL: cmpeq_zext_v16i8:
259; AVX1:       # %bb.0:
260; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
261; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
262; AVX1-NEXT:    retq
263;
264; AVX2-LABEL: cmpeq_zext_v16i8:
265; AVX2:       # %bb.0:
266; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
267; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
268; AVX2-NEXT:    retq
269;
270; AVX512-LABEL: cmpeq_zext_v16i8:
271; AVX512:       # %bb.0:
272; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
273; AVX512-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
274; AVX512-NEXT:    retq
275  %cmp = icmp eq <16 x i8> %a, %b
276  %zext = zext <16 x i1> %cmp to <16 x i8>
277  ret <16 x i8> %zext
278}
279
280define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) {
281; SSE-LABEL: cmpeq_zext_v16i16:
282; SSE:       # %bb.0:
283; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
284; SSE-NEXT:    psrlw $15, %xmm0
285; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
286; SSE-NEXT:    psrlw $15, %xmm1
287; SSE-NEXT:    retq
288;
289; AVX1-LABEL: cmpeq_zext_v16i16:
290; AVX1:       # %bb.0:
291; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
292; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
293; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
294; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
295; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
296; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
297; AVX1-NEXT:    retq
298;
299; AVX2-LABEL: cmpeq_zext_v16i16:
300; AVX2:       # %bb.0:
301; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
302; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
303; AVX2-NEXT:    retq
304;
305; AVX512-LABEL: cmpeq_zext_v16i16:
306; AVX512:       # %bb.0:
307; AVX512-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
308; AVX512-NEXT:    vpsrlw $15, %ymm0, %ymm0
309; AVX512-NEXT:    retq
310  %cmp = icmp eq <16 x i16> %a, %b
311  %zext = zext <16 x i1> %cmp to <16 x i16>
312  ret <16 x i16> %zext
313}
314
315define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) {
316; SSE-LABEL: cmpeq_zext_v4i32:
317; SSE:       # %bb.0:
318; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
319; SSE-NEXT:    psrld $31, %xmm0
320; SSE-NEXT:    retq
321;
322; AVX-LABEL: cmpeq_zext_v4i32:
323; AVX:       # %bb.0:
324; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
325; AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
326; AVX-NEXT:    retq
327  %cmp = icmp eq <4 x i32> %a, %b
328  %zext = zext <4 x i1> %cmp to <4 x i32>
329  ret <4 x i32> %zext
330}
331
332define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) {
333; SSE2-LABEL: cmpeq_zext_v4i64:
334; SSE2:       # %bb.0:
335; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
336; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
337; SSE2-NEXT:    pand %xmm2, %xmm0
338; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1]
339; SSE2-NEXT:    pand %xmm2, %xmm0
340; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
341; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
342; SSE2-NEXT:    pand %xmm3, %xmm1
343; SSE2-NEXT:    pand %xmm2, %xmm1
344; SSE2-NEXT:    retq
345;
346; SSE42-LABEL: cmpeq_zext_v4i64:
347; SSE42:       # %bb.0:
348; SSE42-NEXT:    pcmpeqq %xmm2, %xmm0
349; SSE42-NEXT:    psrlq $63, %xmm0
350; SSE42-NEXT:    pcmpeqq %xmm3, %xmm1
351; SSE42-NEXT:    psrlq $63, %xmm1
352; SSE42-NEXT:    retq
353;
354; AVX1-LABEL: cmpeq_zext_v4i64:
355; AVX1:       # %bb.0:
356; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
357; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
358; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
359; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
360; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
361; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
362; AVX1-NEXT:    retq
363;
364; AVX2-LABEL: cmpeq_zext_v4i64:
365; AVX2:       # %bb.0:
366; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
367; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
368; AVX2-NEXT:    retq
369;
370; AVX512-LABEL: cmpeq_zext_v4i64:
371; AVX512:       # %bb.0:
372; AVX512-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
373; AVX512-NEXT:    vpsrlq $63, %ymm0, %ymm0
374; AVX512-NEXT:    retq
375  %cmp = icmp eq <4 x i64> %a, %b
376  %zext = zext <4 x i1> %cmp to <4 x i64>
377  ret <4 x i64> %zext
378}
379
380define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) {
381; SSE-LABEL: cmpgt_zext_v32i8:
382; SSE:       # %bb.0:
383; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
384; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
385; SSE-NEXT:    pand %xmm2, %xmm0
386; SSE-NEXT:    pcmpgtb %xmm3, %xmm1
387; SSE-NEXT:    pand %xmm2, %xmm1
388; SSE-NEXT:    retq
389;
390; AVX1-LABEL: cmpgt_zext_v32i8:
391; AVX1:       # %bb.0:
392; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
393; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
394; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
395; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
396; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
397; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
398; AVX1-NEXT:    retq
399;
400; AVX2-LABEL: cmpgt_zext_v32i8:
401; AVX2:       # %bb.0:
402; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
403; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
404; AVX2-NEXT:    retq
405;
406; AVX512-LABEL: cmpgt_zext_v32i8:
407; AVX512:       # %bb.0:
408; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
409; AVX512-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
410; AVX512-NEXT:    retq
411  %cmp = icmp sgt <32 x i8> %a, %b
412  %zext = zext <32 x i1> %cmp to <32 x i8>
413  ret <32 x i8> %zext
414}
415
416define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) {
417; SSE-LABEL: cmpgt_zext_v8i16:
418; SSE:       # %bb.0:
419; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
420; SSE-NEXT:    psrlw $15, %xmm0
421; SSE-NEXT:    retq
422;
423; AVX-LABEL: cmpgt_zext_v8i16:
424; AVX:       # %bb.0:
425; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
426; AVX-NEXT:    vpsrlw $15, %xmm0, %xmm0
427; AVX-NEXT:    retq
428  %cmp = icmp sgt <8 x i16> %a, %b
429  %zext = zext <8 x i1> %cmp to <8 x i16>
430  ret <8 x i16> %zext
431}
432
433define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) {
434; SSE-LABEL: cmpgt_zext_v8i32:
435; SSE:       # %bb.0:
436; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
437; SSE-NEXT:    psrld $31, %xmm0
438; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
439; SSE-NEXT:    psrld $31, %xmm1
440; SSE-NEXT:    retq
441;
442; AVX1-LABEL: cmpgt_zext_v8i32:
443; AVX1:       # %bb.0:
444; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
445; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
446; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
447; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
448; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
449; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
450; AVX1-NEXT:    retq
451;
452; AVX2-LABEL: cmpgt_zext_v8i32:
453; AVX2:       # %bb.0:
454; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
455; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
456; AVX2-NEXT:    retq
457;
458; AVX512-LABEL: cmpgt_zext_v8i32:
459; AVX512:       # %bb.0:
460; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
461; AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
462; AVX512-NEXT:    retq
463  %cmp = icmp sgt <8 x i32> %a, %b
464  %zext = zext <8 x i1> %cmp to <8 x i32>
465  ret <8 x i32> %zext
466}
467
468define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) {
469; SSE2-LABEL: cmpgt_zext_v2i64:
470; SSE2:       # %bb.0:
471; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
472; SSE2-NEXT:    pxor %xmm2, %xmm1
473; SSE2-NEXT:    pxor %xmm2, %xmm0
474; SSE2-NEXT:    movdqa %xmm0, %xmm2
475; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
476; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
477; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
478; SSE2-NEXT:    pand %xmm2, %xmm1
479; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
480; SSE2-NEXT:    por %xmm1, %xmm0
481; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
482; SSE2-NEXT:    retq
483;
484; SSE42-LABEL: cmpgt_zext_v2i64:
485; SSE42:       # %bb.0:
486; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
487; SSE42-NEXT:    psrlq $63, %xmm0
488; SSE42-NEXT:    retq
489;
490; AVX-LABEL: cmpgt_zext_v2i64:
491; AVX:       # %bb.0:
492; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
493; AVX-NEXT:    vpsrlq $63, %xmm0, %xmm0
494; AVX-NEXT:    retq
495  %cmp = icmp sgt <2 x i64> %a, %b
496  %zext = zext <2 x i1> %cmp to <2 x i64>
497  ret <2 x i64> %zext
498}
499
500; Test that we optimize a zext of a vector setcc ne zero where all bits but the
501; lsb are known to be zero.
502define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) {
503; SSE2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
504; SSE2:       # %bb.0:
505; SSE2-NEXT:    movdqa %xmm0, %xmm1
506; SSE2-NEXT:    psrlw $15, %xmm1
507; SSE2-NEXT:    pxor %xmm2, %xmm2
508; SSE2-NEXT:    movdqa %xmm1, %xmm0
509; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
510; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
511; SSE2-NEXT:    retq
512;
513; SSE42-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
514; SSE42:       # %bb.0:
515; SSE42-NEXT:    movdqa %xmm0, %xmm1
516; SSE42-NEXT:    psrlw $15, %xmm1
517; SSE42-NEXT:    pxor %xmm2, %xmm2
518; SSE42-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
519; SSE42-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
520; SSE42-NEXT:    retq
521;
522; AVX1-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
523; AVX1:       # %bb.0:
524; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
525; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
526; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
527; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
528; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
529; AVX1-NEXT:    retq
530;
531; AVX2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
532; AVX2:       # %bb.0:
533; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
534; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
535; AVX2-NEXT:    retq
536;
537; AVX512-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
538; AVX512:       # %bb.0:
539; AVX512-NEXT:    vpsrlw $15, %xmm0, %xmm0
540; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
541; AVX512-NEXT:    retq
542  %a = lshr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
543  %b = icmp ne <8 x i16> %a, zeroinitializer
544  %c = zext <8 x i1> %b to <8 x i32>
545  ret <8 x i32> %c
546}
547
548define <8 x i32> @cmpne_knownzeros_zext_v8i32_v8i32(<8 x i32> %x) {
549; SSE-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
550; SSE:       # %bb.0:
551; SSE-NEXT:    psrld $31, %xmm0
552; SSE-NEXT:    psrld $31, %xmm1
553; SSE-NEXT:    retq
554;
555; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
556; AVX1:       # %bb.0:
557; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm1
558; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
559; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
560; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
561; AVX1-NEXT:    retq
562;
563; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
564; AVX2:       # %bb.0:
565; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
566; AVX2-NEXT:    retq
567;
568; AVX512-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
569; AVX512:       # %bb.0:
570; AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
571; AVX512-NEXT:    retq
572  %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
573  %b = icmp ne <8 x i32> %a, zeroinitializer
574  %c = zext <8 x i1> %b to <8 x i32>
575  ret <8 x i32> %c
576}
577
578define <8 x i16> @cmpne_knownzeros_zext_v8i32_v8i16(<8 x i32> %x) {
579; SSE2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
580; SSE2:       # %bb.0:
581; SSE2-NEXT:    psrld $31, %xmm1
582; SSE2-NEXT:    psrld $31, %xmm0
583; SSE2-NEXT:    packuswb %xmm1, %xmm0
584; SSE2-NEXT:    retq
585;
586; SSE42-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
587; SSE42:       # %bb.0:
588; SSE42-NEXT:    psrld $31, %xmm1
589; SSE42-NEXT:    psrld $31, %xmm0
590; SSE42-NEXT:    packusdw %xmm1, %xmm0
591; SSE42-NEXT:    retq
592;
593; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
594; AVX1:       # %bb.0:
595; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
596; AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
597; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
598; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
599; AVX1-NEXT:    vzeroupper
600; AVX1-NEXT:    retq
601;
602; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
603; AVX2:       # %bb.0:
604; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
605; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
606; AVX2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
607; AVX2-NEXT:    vzeroupper
608; AVX2-NEXT:    retq
609;
610; AVX512-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
611; AVX512:       # %bb.0:
612; AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
613; AVX512-NEXT:    vpmovdw %ymm0, %xmm0
614; AVX512-NEXT:    vzeroupper
615; AVX512-NEXT:    retq
616  %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
617  %b = icmp ne <8 x i32> %a, zeroinitializer
618  %c = zext <8 x i1> %b to <8 x i16>
619  ret <8 x i16> %c
620}
621
622; PR26697
623define <4 x i32> @cmpeq_one_mask_bit(<4 x i32> %mask) {
624; SSE-LABEL: cmpeq_one_mask_bit:
625; SSE:       # %bb.0:
626; SSE-NEXT:    psrad $31, %xmm0
627; SSE-NEXT:    retq
628;
629; AVX-LABEL: cmpeq_one_mask_bit:
630; AVX:       # %bb.0:
631; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
632; AVX-NEXT:    retq
633  %mask_signbit = and <4 x i32> %mask, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
634  %mask_bool = icmp ne <4 x i32> %mask_signbit, zeroinitializer
635  %mask_bool_ext = sext <4 x i1> %mask_bool to <4 x i32>
636  ret <4 x i32> %mask_bool_ext
637}
638
639define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %x, <2 x i64> %y) {
640; SSE2-LABEL: not_signbit_mask_v2i64:
641; SSE2:       # %bb.0:
642; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
643; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
644; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
645; SSE2-NEXT:    pand %xmm1, %xmm0
646; SSE2-NEXT:    retq
647;
648; SSE42-LABEL: not_signbit_mask_v2i64:
649; SSE42:       # %bb.0:
650; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
651; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
652; SSE42-NEXT:    pand %xmm1, %xmm0
653; SSE42-NEXT:    retq
654;
655; AVX1-LABEL: not_signbit_mask_v2i64:
656; AVX1:       # %bb.0:
657; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
658; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
659; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
660; AVX1-NEXT:    retq
661;
662; AVX2-LABEL: not_signbit_mask_v2i64:
663; AVX2:       # %bb.0:
664; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
665; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
666; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
667; AVX2-NEXT:    retq
668;
669; AVX512-LABEL: not_signbit_mask_v2i64:
670; AVX512:       # %bb.0:
671; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
672; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0
673; AVX512-NEXT:    retq
674  %sh = ashr <2 x i64> %x, <i64 63, i64 63>
675  %not = xor <2 x i64> %sh, <i64 -1, i64 -1>
676  %and = and <2 x i64> %y, %not
677  ret <2 x i64> %and
678}
679
680define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %x, <4 x i32> %y) {
681; SSE-LABEL: not_signbit_mask_v4i32:
682; SSE:       # %bb.0:
683; SSE-NEXT:    psrad $31, %xmm0
684; SSE-NEXT:    pandn %xmm1, %xmm0
685; SSE-NEXT:    retq
686;
687; AVX-LABEL: not_signbit_mask_v4i32:
688; AVX:       # %bb.0:
689; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
690; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
691; AVX-NEXT:    retq
692  %sh = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
693  %not = xor <4 x i32> %sh, <i32 -1, i32 -1, i32 -1, i32 -1>
694  %and = and <4 x i32> %not, %y
695  ret <4 x i32> %and
696}
697
698define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %x, <8 x i16> %y) {
699; SSE-LABEL: not_signbit_mask_v8i16:
700; SSE:       # %bb.0:
701; SSE-NEXT:    psraw $15, %xmm0
702; SSE-NEXT:    pandn %xmm1, %xmm0
703; SSE-NEXT:    retq
704;
705; AVX-LABEL: not_signbit_mask_v8i16:
706; AVX:       # %bb.0:
707; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
708; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
709; AVX-NEXT:    retq
710  %sh = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
711  %not = xor <8 x i16> %sh, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
712  %and = and <8 x i16> %y, %not
713  ret <8 x i16> %and
714}
715
716define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %x, <16 x i8> %y) {
717; SSE-LABEL: not_signbit_mask_v16i8:
718; SSE:       # %bb.0:
719; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
720; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
721; SSE-NEXT:    pand %xmm1, %xmm0
722; SSE-NEXT:    retq
723;
724; AVX-LABEL: not_signbit_mask_v16i8:
725; AVX:       # %bb.0:
726; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
727; AVX-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
728; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
729; AVX-NEXT:    retq
730  %sh = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
731  %not = xor <16 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
732  %and = and <16 x i8> %not, %y
733  ret <16 x i8> %and
734}
735
736define <4 x i64> @not_signbit_mask_v4i64(<4 x i64> %x, <4 x i64> %y) {
737; SSE2-LABEL: not_signbit_mask_v4i64:
738; SSE2:       # %bb.0:
739; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
740; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
741; SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
742; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
743; SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
744; SSE2-NEXT:    pand %xmm2, %xmm0
745; SSE2-NEXT:    pand %xmm3, %xmm1
746; SSE2-NEXT:    retq
747;
748; SSE42-LABEL: not_signbit_mask_v4i64:
749; SSE42:       # %bb.0:
750; SSE42-NEXT:    pcmpeqd %xmm4, %xmm4
751; SSE42-NEXT:    pcmpgtq %xmm4, %xmm1
752; SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
753; SSE42-NEXT:    pand %xmm2, %xmm0
754; SSE42-NEXT:    pand %xmm3, %xmm1
755; SSE42-NEXT:    retq
756;
757; AVX1-LABEL: not_signbit_mask_v4i64:
758; AVX1:       # %bb.0:
759; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
760; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
761; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
762; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
763; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
764; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
765; AVX1-NEXT:    retq
766;
767; AVX2-LABEL: not_signbit_mask_v4i64:
768; AVX2:       # %bb.0:
769; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
770; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
771; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
772; AVX2-NEXT:    retq
773;
774; AVX512-LABEL: not_signbit_mask_v4i64:
775; AVX512:       # %bb.0:
776; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
777; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
778; AVX512-NEXT:    retq
779  %sh = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63>
780  %not = xor <4 x i64> %sh, <i64 -1, i64 -1, i64 -1, i64 -1>
781  %and = and <4 x i64> %y, %not
782  ret <4 x i64> %and
783}
784
785define <8 x i32> @not_signbit_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
786; SSE-LABEL: not_signbit_mask_v8i32:
787; SSE:       # %bb.0:
788; SSE-NEXT:    psrad $31, %xmm0
789; SSE-NEXT:    pandn %xmm2, %xmm0
790; SSE-NEXT:    psrad $31, %xmm1
791; SSE-NEXT:    pandn %xmm3, %xmm1
792; SSE-NEXT:    retq
793;
794; AVX1-LABEL: not_signbit_mask_v8i32:
795; AVX1:       # %bb.0:
796; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
797; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
798; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
799; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
800; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
801; AVX1-NEXT:    retq
802;
803; AVX2-LABEL: not_signbit_mask_v8i32:
804; AVX2:       # %bb.0:
805; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
806; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
807; AVX2-NEXT:    retq
808;
809; AVX512-LABEL: not_signbit_mask_v8i32:
810; AVX512:       # %bb.0:
811; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
812; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
813; AVX512-NEXT:    retq
814  %sh = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
815  %not = xor <8 x i32> %sh, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
816  %and = and <8 x i32> %not, %y
817  ret <8 x i32> %and
818}
819
820define <16 x i16> @not_signbit_mask_v16i16(<16 x i16> %x, <16 x i16> %y) {
821; SSE-LABEL: not_signbit_mask_v16i16:
822; SSE:       # %bb.0:
823; SSE-NEXT:    psraw $15, %xmm0
824; SSE-NEXT:    pandn %xmm2, %xmm0
825; SSE-NEXT:    psraw $15, %xmm1
826; SSE-NEXT:    pandn %xmm3, %xmm1
827; SSE-NEXT:    retq
828;
829; AVX1-LABEL: not_signbit_mask_v16i16:
830; AVX1:       # %bb.0:
831; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm2
832; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
833; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
834; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
835; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
836; AVX1-NEXT:    retq
837;
838; AVX2-LABEL: not_signbit_mask_v16i16:
839; AVX2:       # %bb.0:
840; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
841; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
842; AVX2-NEXT:    retq
843;
844; AVX512-LABEL: not_signbit_mask_v16i16:
845; AVX512:       # %bb.0:
846; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm0
847; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
848; AVX512-NEXT:    retq
849  %sh = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
850  %not = xor <16 x i16> %sh, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
851  %and = and <16 x i16> %y, %not
852  ret <16 x i16> %and
853}
854
855define <32 x i8> @not_signbit_mask_v32i8(<32 x i8> %x, <32 x i8> %y) {
856; SSE-LABEL: not_signbit_mask_v32i8:
857; SSE:       # %bb.0:
858; SSE-NEXT:    pcmpeqd %xmm4, %xmm4
859; SSE-NEXT:    pcmpgtb %xmm4, %xmm1
860; SSE-NEXT:    pcmpgtb %xmm4, %xmm0
861; SSE-NEXT:    pand %xmm2, %xmm0
862; SSE-NEXT:    pand %xmm3, %xmm1
863; SSE-NEXT:    retq
864;
865; AVX1-LABEL: not_signbit_mask_v32i8:
866; AVX1:       # %bb.0:
867; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
868; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
869; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
870; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm3, %xmm0
871; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
872; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
873; AVX1-NEXT:    retq
874;
875; AVX2-LABEL: not_signbit_mask_v32i8:
876; AVX2:       # %bb.0:
877; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
878; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
879; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
880; AVX2-NEXT:    retq
881;
882; AVX512-LABEL: not_signbit_mask_v32i8:
883; AVX512:       # %bb.0:
884; AVX512-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
885; AVX512-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
886; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
887; AVX512-NEXT:    retq
888  %sh = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
889  %not = xor <32 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
890  %and = and <32 x i8> %not, %y
891  ret <32 x i8> %and
892}
893
894define <2 x i64> @ispositive_mask_v2i64(<2 x i64> %x, <2 x i64> %y) {
895; SSE2-LABEL: ispositive_mask_v2i64:
896; SSE2:       # %bb.0:
897; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
898; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
899; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
900; SSE2-NEXT:    pand %xmm1, %xmm0
901; SSE2-NEXT:    retq
902;
903; SSE42-LABEL: ispositive_mask_v2i64:
904; SSE42:       # %bb.0:
905; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
906; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
907; SSE42-NEXT:    pand %xmm1, %xmm0
908; SSE42-NEXT:    retq
909;
910; AVX1-LABEL: ispositive_mask_v2i64:
911; AVX1:       # %bb.0:
912; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
913; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
914; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
915; AVX1-NEXT:    retq
916;
917; AVX2-LABEL: ispositive_mask_v2i64:
918; AVX2:       # %bb.0:
919; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
920; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
921; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
922; AVX2-NEXT:    retq
923;
924; AVX512-LABEL: ispositive_mask_v2i64:
925; AVX512:       # %bb.0:
926; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
927; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0
928; AVX512-NEXT:    retq
929  %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
930  %mask = sext <2 x i1> %cmp to <2 x i64>
931  %and = and <2 x i64> %mask, %y
932  ret <2 x i64> %and
933}
934
935define <4 x i32> @is_positive_mask_v4i32(<4 x i32> %x, <4 x i32> %y) {
936; SSE-LABEL: is_positive_mask_v4i32:
937; SSE:       # %bb.0:
938; SSE-NEXT:    psrad $31, %xmm0
939; SSE-NEXT:    pandn %xmm1, %xmm0
940; SSE-NEXT:    retq
941;
942; AVX-LABEL: is_positive_mask_v4i32:
943; AVX:       # %bb.0:
944; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
945; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
946; AVX-NEXT:    retq
947  %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
948  %mask = sext <4 x i1> %cmp to <4 x i32>
949  %and = and <4 x i32> %y, %mask
950  ret <4 x i32> %and
951}
952
953define <8 x i16> @is_positive_mask_v8i16(<8 x i16> %x, <8 x i16> %y) {
954; SSE-LABEL: is_positive_mask_v8i16:
955; SSE:       # %bb.0:
956; SSE-NEXT:    psraw $15, %xmm0
957; SSE-NEXT:    pandn %xmm1, %xmm0
958; SSE-NEXT:    retq
959;
960; AVX-LABEL: is_positive_mask_v8i16:
961; AVX:       # %bb.0:
962; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
963; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
964; AVX-NEXT:    retq
965  %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
966  %mask = sext <8 x i1> %cmp to <8 x i16>
967  %and = and <8 x i16> %mask, %y
968  ret <8 x i16> %and
969}
970
971define <16 x i8> @is_positive_mask_v16i8(<16 x i8> %x, <16 x i8> %y) {
972; SSE-LABEL: is_positive_mask_v16i8:
973; SSE:       # %bb.0:
974; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
975; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
976; SSE-NEXT:    pand %xmm1, %xmm0
977; SSE-NEXT:    retq
978;
979; AVX-LABEL: is_positive_mask_v16i8:
980; AVX:       # %bb.0:
981; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
982; AVX-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
983; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
984; AVX-NEXT:    retq
985  %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
986  %mask = sext <16 x i1> %cmp to <16 x i8>
987  %and = and <16 x i8> %y, %mask
988  ret <16 x i8> %and
989}
990
991define <4 x i64> @is_positive_mask_v4i64(<4 x i64> %x, <4 x i64> %y) {
992; SSE2-LABEL: is_positive_mask_v4i64:
993; SSE2:       # %bb.0:
994; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
995; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
996; SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
997; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
998; SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
999; SSE2-NEXT:    pand %xmm2, %xmm0
1000; SSE2-NEXT:    pand %xmm3, %xmm1
1001; SSE2-NEXT:    retq
1002;
1003; SSE42-LABEL: is_positive_mask_v4i64:
1004; SSE42:       # %bb.0:
1005; SSE42-NEXT:    pcmpeqd %xmm4, %xmm4
1006; SSE42-NEXT:    pcmpgtq %xmm4, %xmm1
1007; SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
1008; SSE42-NEXT:    pand %xmm2, %xmm0
1009; SSE42-NEXT:    pand %xmm3, %xmm1
1010; SSE42-NEXT:    retq
1011;
1012; AVX1-LABEL: is_positive_mask_v4i64:
1013; AVX1:       # %bb.0:
1014; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1015; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1016; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1017; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm0
1018; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1019; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
1020; AVX1-NEXT:    retq
1021;
1022; AVX2-LABEL: is_positive_mask_v4i64:
1023; AVX2:       # %bb.0:
1024; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1025; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
1026; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1027; AVX2-NEXT:    retq
1028;
1029; AVX512-LABEL: is_positive_mask_v4i64:
1030; AVX512:       # %bb.0:
1031; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
1032; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1033; AVX512-NEXT:    retq
1034  %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
1035  %mask = sext <4 x i1> %cmp to <4 x i64>
1036  %and = and <4 x i64> %mask, %y
1037  ret <4 x i64> %and
1038}
1039
1040define <8 x i32> @is_positive_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
1041; SSE-LABEL: is_positive_mask_v8i32:
1042; SSE:       # %bb.0:
1043; SSE-NEXT:    psrad $31, %xmm0
1044; SSE-NEXT:    pandn %xmm2, %xmm0
1045; SSE-NEXT:    psrad $31, %xmm1
1046; SSE-NEXT:    pandn %xmm3, %xmm1
1047; SSE-NEXT:    retq
1048;
1049; AVX1-LABEL: is_positive_mask_v8i32:
1050; AVX1:       # %bb.0:
1051; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1052; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1053; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm2, %xmm2
1054; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm0, %xmm0
1055; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1056; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1057; AVX1-NEXT:    retq
1058;
1059; AVX2-LABEL: is_positive_mask_v8i32:
1060; AVX2:       # %bb.0:
1061; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1062; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1063; AVX2-NEXT:    retq
1064;
1065; AVX512-LABEL: is_positive_mask_v8i32:
1066; AVX512:       # %bb.0:
1067; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
1068; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1069; AVX512-NEXT:    retq
1070  %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1071  %mask = sext <8 x i1> %cmp to <8 x i32>
1072  %and = and <8 x i32> %y, %mask
1073  ret <8 x i32> %and
1074}
1075
1076define <16 x i16> @is_positive_mask_v16i16(<16 x i16> %x, <16 x i16> %y) {
1077; SSE-LABEL: is_positive_mask_v16i16:
1078; SSE:       # %bb.0:
1079; SSE-NEXT:    psraw $15, %xmm0
1080; SSE-NEXT:    pandn %xmm2, %xmm0
1081; SSE-NEXT:    psraw $15, %xmm1
1082; SSE-NEXT:    pandn %xmm3, %xmm1
1083; SSE-NEXT:    retq
1084;
1085; AVX1-LABEL: is_positive_mask_v16i16:
1086; AVX1:       # %bb.0:
1087; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1088; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1089; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm2, %xmm2
1090; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm0, %xmm0
1091; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1092; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
1093; AVX1-NEXT:    retq
1094;
1095; AVX2-LABEL: is_positive_mask_v16i16:
1096; AVX2:       # %bb.0:
1097; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
1098; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1099; AVX2-NEXT:    retq
1100;
1101; AVX512-LABEL: is_positive_mask_v16i16:
1102; AVX512:       # %bb.0:
1103; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm0
1104; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1105; AVX512-NEXT:    retq
1106  %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1107  %mask = sext <16 x i1> %cmp to <16 x i16>
1108  %and = and <16 x i16> %mask, %y
1109  ret <16 x i16> %and
1110}
1111
1112define <32 x i8> @is_positive_mask_v32i8(<32 x i8> %x, <32 x i8> %y) {
1113; SSE-LABEL: is_positive_mask_v32i8:
1114; SSE:       # %bb.0:
1115; SSE-NEXT:    pcmpeqd %xmm4, %xmm4
1116; SSE-NEXT:    pcmpgtb %xmm4, %xmm1
1117; SSE-NEXT:    pcmpgtb %xmm4, %xmm0
1118; SSE-NEXT:    pand %xmm2, %xmm0
1119; SSE-NEXT:    pand %xmm3, %xmm1
1120; SSE-NEXT:    retq
1121;
1122; AVX1-LABEL: is_positive_mask_v32i8:
1123; AVX1:       # %bb.0:
1124; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1125; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1126; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm2, %xmm2
1127; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm0, %xmm0
1128; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1129; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1130; AVX1-NEXT:    retq
1131;
1132; AVX2-LABEL: is_positive_mask_v32i8:
1133; AVX2:       # %bb.0:
1134; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1135; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1136; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
1137; AVX2-NEXT:    retq
1138;
1139; AVX512-LABEL: is_positive_mask_v32i8:
1140; AVX512:       # %bb.0:
1141; AVX512-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1142; AVX512-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1143; AVX512-NEXT:    vpand %ymm0, %ymm1, %ymm0
1144; AVX512-NEXT:    retq
1145  %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1146  %mask = sext <32 x i1> %cmp to <32 x i8>
1147  %and = and <32 x i8> %y, %mask
1148  ret <32 x i8> %and
1149}
1150
1151define <2 x i64> @ispositive_mask_load_v2i64(<2 x i64> %x, ptr %p) {
1152; SSE2-LABEL: ispositive_mask_load_v2i64:
1153; SSE2:       # %bb.0:
1154; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1155; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1156; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
1157; SSE2-NEXT:    pand (%rdi), %xmm0
1158; SSE2-NEXT:    retq
1159;
1160; SSE42-LABEL: ispositive_mask_load_v2i64:
1161; SSE42:       # %bb.0:
1162; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
1163; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1164; SSE42-NEXT:    pand (%rdi), %xmm0
1165; SSE42-NEXT:    retq
1166;
1167; AVX1-LABEL: ispositive_mask_load_v2i64:
1168; AVX1:       # %bb.0:
1169; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1170; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
1171; AVX1-NEXT:    vpand (%rdi), %xmm0, %xmm0
1172; AVX1-NEXT:    retq
1173;
1174; AVX2-LABEL: ispositive_mask_load_v2i64:
1175; AVX2:       # %bb.0:
1176; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1177; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
1178; AVX2-NEXT:    vpand (%rdi), %xmm0, %xmm0
1179; AVX2-NEXT:    retq
1180;
1181; AVX512-LABEL: ispositive_mask_load_v2i64:
1182; AVX512:       # %bb.0:
1183; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
1184; AVX512-NEXT:    vpandn (%rdi), %xmm0, %xmm0
1185; AVX512-NEXT:    retq
1186  %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
1187  %mask = sext <2 x i1> %cmp to <2 x i64>
1188  %y = load <2 x i64>, ptr %p
1189  %and = and <2 x i64> %mask, %y
1190  ret <2 x i64> %and
1191}
1192
1193define <4 x i32> @is_positive_mask_load_v4i32(<4 x i32> %x, ptr %p) {
1194; SSE-LABEL: is_positive_mask_load_v4i32:
1195; SSE:       # %bb.0:
1196; SSE-NEXT:    psrad $31, %xmm0
1197; SSE-NEXT:    pandn (%rdi), %xmm0
1198; SSE-NEXT:    retq
1199;
1200; AVX-LABEL: is_positive_mask_load_v4i32:
1201; AVX:       # %bb.0:
1202; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
1203; AVX-NEXT:    vpandn (%rdi), %xmm0, %xmm0
1204; AVX-NEXT:    retq
1205  %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
1206  %mask = sext <4 x i1> %cmp to <4 x i32>
1207  %y = load <4 x i32>, ptr %p
1208  %and = and <4 x i32> %y, %mask
1209  ret <4 x i32> %and
1210}
1211
1212define <8 x i16> @is_positive_mask_load_v8i16(<8 x i16> %x, ptr %p) {
1213; SSE-LABEL: is_positive_mask_load_v8i16:
1214; SSE:       # %bb.0:
1215; SSE-NEXT:    psraw $15, %xmm0
1216; SSE-NEXT:    pandn (%rdi), %xmm0
1217; SSE-NEXT:    retq
1218;
1219; AVX-LABEL: is_positive_mask_load_v8i16:
1220; AVX:       # %bb.0:
1221; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
1222; AVX-NEXT:    vpandn (%rdi), %xmm0, %xmm0
1223; AVX-NEXT:    retq
1224  %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1225  %mask = sext <8 x i1> %cmp to <8 x i16>
1226  %y = load <8 x i16>, ptr %p
1227  %and = and <8 x i16> %mask, %y
1228  ret <8 x i16> %and
1229}
1230
1231define <16 x i8> @is_positive_mask_load_v16i8(<16 x i8> %x, ptr %p) {
1232; SSE-LABEL: is_positive_mask_load_v16i8:
1233; SSE:       # %bb.0:
1234; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
1235; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
1236; SSE-NEXT:    pand (%rdi), %xmm0
1237; SSE-NEXT:    retq
1238;
1239; AVX-LABEL: is_positive_mask_load_v16i8:
1240; AVX:       # %bb.0:
1241; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1242; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
1243; AVX-NEXT:    vpand (%rdi), %xmm0, %xmm0
1244; AVX-NEXT:    retq
1245  %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1246  %mask = sext <16 x i1> %cmp to <16 x i8>
1247  %y = load <16 x i8>, ptr %p
1248  %and = and <16 x i8> %y, %mask
1249  ret <16 x i8> %and
1250}
1251
1252define <4 x i64> @is_positive_mask_load_v4i64(<4 x i64> %x, ptr %p) {
1253; SSE2-LABEL: is_positive_mask_load_v4i64:
1254; SSE2:       # %bb.0:
1255; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1256; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1257; SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
1258; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1259; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
1260; SSE2-NEXT:    pand (%rdi), %xmm0
1261; SSE2-NEXT:    pand 16(%rdi), %xmm1
1262; SSE2-NEXT:    retq
1263;
1264; SSE42-LABEL: is_positive_mask_load_v4i64:
1265; SSE42:       # %bb.0:
1266; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
1267; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
1268; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1269; SSE42-NEXT:    pand (%rdi), %xmm0
1270; SSE42-NEXT:    pand 16(%rdi), %xmm1
1271; SSE42-NEXT:    retq
1272;
1273; AVX1-LABEL: is_positive_mask_load_v4i64:
1274; AVX1:       # %bb.0:
1275; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1276; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1277; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm1, %xmm1
1278; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
1279; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1280; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
1281; AVX1-NEXT:    retq
1282;
1283; AVX2-LABEL: is_positive_mask_load_v4i64:
1284; AVX2:       # %bb.0:
1285; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1286; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
1287; AVX2-NEXT:    vpand (%rdi), %ymm0, %ymm0
1288; AVX2-NEXT:    retq
1289;
1290; AVX512-LABEL: is_positive_mask_load_v4i64:
1291; AVX512:       # %bb.0:
1292; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
1293; AVX512-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1294; AVX512-NEXT:    retq
1295  %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
1296  %mask = sext <4 x i1> %cmp to <4 x i64>
1297  %y = load <4 x i64>, ptr %p
1298  %and = and <4 x i64> %mask, %y
1299  ret <4 x i64> %and
1300}
1301
1302define <8 x i32> @is_positive_mask_load_v8i32(<8 x i32> %x, ptr %p) {
1303; SSE-LABEL: is_positive_mask_load_v8i32:
1304; SSE:       # %bb.0:
1305; SSE-NEXT:    psrad $31, %xmm0
1306; SSE-NEXT:    pandn (%rdi), %xmm0
1307; SSE-NEXT:    psrad $31, %xmm1
1308; SSE-NEXT:    pandn 16(%rdi), %xmm1
1309; SSE-NEXT:    retq
1310;
1311; AVX1-LABEL: is_positive_mask_load_v8i32:
1312; AVX1:       # %bb.0:
1313; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1314; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1315; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm1, %xmm1
1316; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm0, %xmm0
1317; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1318; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
1319; AVX1-NEXT:    retq
1320;
1321; AVX2-LABEL: is_positive_mask_load_v8i32:
1322; AVX2:       # %bb.0:
1323; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1324; AVX2-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1325; AVX2-NEXT:    retq
1326;
1327; AVX512-LABEL: is_positive_mask_load_v8i32:
1328; AVX512:       # %bb.0:
1329; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
1330; AVX512-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1331; AVX512-NEXT:    retq
1332  %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1333  %mask = sext <8 x i1> %cmp to <8 x i32>
1334  %y = load <8 x i32>, ptr %p
1335  %and = and <8 x i32> %y, %mask
1336  ret <8 x i32> %and
1337}
1338
1339define <16 x i16> @is_positive_mask_load_v16i16(<16 x i16> %x, ptr %p) {
1340; SSE-LABEL: is_positive_mask_load_v16i16:
1341; SSE:       # %bb.0:
1342; SSE-NEXT:    psraw $15, %xmm0
1343; SSE-NEXT:    pandn (%rdi), %xmm0
1344; SSE-NEXT:    psraw $15, %xmm1
1345; SSE-NEXT:    pandn 16(%rdi), %xmm1
1346; SSE-NEXT:    retq
1347;
1348; AVX1-LABEL: is_positive_mask_load_v16i16:
1349; AVX1:       # %bb.0:
1350; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1351; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1352; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm1, %xmm1
1353; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm0, %xmm0
1354; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1355; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
1356; AVX1-NEXT:    retq
1357;
1358; AVX2-LABEL: is_positive_mask_load_v16i16:
1359; AVX2:       # %bb.0:
1360; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
1361; AVX2-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1362; AVX2-NEXT:    retq
1363;
1364; AVX512-LABEL: is_positive_mask_load_v16i16:
1365; AVX512:       # %bb.0:
1366; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm0
1367; AVX512-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1368; AVX512-NEXT:    retq
1369  %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1370  %mask = sext <16 x i1> %cmp to <16 x i16>
1371  %y = load <16 x i16>, ptr %p
1372  %and = and <16 x i16> %mask, %y
1373  ret <16 x i16> %and
1374}
1375
1376define <32 x i8> @is_positive_mask_load_v32i8(<32 x i8> %x, ptr %p) {
1377; SSE-LABEL: is_positive_mask_load_v32i8:
1378; SSE:       # %bb.0:
1379; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
1380; SSE-NEXT:    pcmpgtb %xmm2, %xmm1
1381; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
1382; SSE-NEXT:    pand (%rdi), %xmm0
1383; SSE-NEXT:    pand 16(%rdi), %xmm1
1384; SSE-NEXT:    retq
1385;
1386; AVX1-LABEL: is_positive_mask_load_v32i8:
1387; AVX1:       # %bb.0:
1388; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1389; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1390; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm1, %xmm1
1391; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
1392; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1393; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
1394; AVX1-NEXT:    retq
1395;
1396; AVX2-LABEL: is_positive_mask_load_v32i8:
1397; AVX2:       # %bb.0:
1398; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1399; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
1400; AVX2-NEXT:    vpand (%rdi), %ymm0, %ymm0
1401; AVX2-NEXT:    retq
1402;
1403; AVX512-LABEL: is_positive_mask_load_v32i8:
1404; AVX512:       # %bb.0:
1405; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1406; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
1407; AVX512-NEXT:    vpand (%rdi), %ymm0, %ymm0
1408; AVX512-NEXT:    retq
1409  %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1410  %mask = sext <32 x i1> %cmp to <32 x i8>
1411  %y = load <32 x i8>, ptr %p
1412  %and = and <32 x i8> %y, %mask
1413  ret <32 x i8> %and
1414}
1415
1416define <2 x i1> @ispositive_mask_v2i64_v2i1(<2 x i64> %x, <2 x i1> %y) {
1417; SSE2-LABEL: ispositive_mask_v2i64_v2i1:
1418; SSE2:       # %bb.0:
1419; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1420; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1421; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
1422; SSE2-NEXT:    pand %xmm1, %xmm0
1423; SSE2-NEXT:    retq
1424;
1425; SSE42-LABEL: ispositive_mask_v2i64_v2i1:
1426; SSE42:       # %bb.0:
1427; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
1428; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1429; SSE42-NEXT:    pand %xmm1, %xmm0
1430; SSE42-NEXT:    retq
1431;
1432; AVX1-LABEL: ispositive_mask_v2i64_v2i1:
1433; AVX1:       # %bb.0:
1434; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1435; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
1436; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1437; AVX1-NEXT:    retq
1438;
1439; AVX2-LABEL: ispositive_mask_v2i64_v2i1:
1440; AVX2:       # %bb.0:
1441; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1442; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
1443; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1444; AVX2-NEXT:    retq
1445;
1446; AVX512F-LABEL: ispositive_mask_v2i64_v2i1:
1447; AVX512F:       # %bb.0:
1448; AVX512F-NEXT:    vpsllq $63, %xmm1, %xmm1
1449; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1450; AVX512F-NEXT:    vpcmpgtq %xmm2, %xmm0, %k1
1451; AVX512F-NEXT:    vptestmq %xmm1, %xmm1, %k1 {%k1}
1452; AVX512F-NEXT:    vmovdqa64 %xmm2, %xmm0 {%k1} {z}
1453; AVX512F-NEXT:    retq
1454;
1455; AVX512DQBW-LABEL: ispositive_mask_v2i64_v2i1:
1456; AVX512DQBW:       # %bb.0:
1457; AVX512DQBW-NEXT:    vpsllq $63, %xmm1, %xmm1
1458; AVX512DQBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1459; AVX512DQBW-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1460; AVX512DQBW-NEXT:    vpcmpgtq %xmm3, %xmm0, %k1
1461; AVX512DQBW-NEXT:    vpcmpgtq %xmm1, %xmm2, %k0 {%k1}
1462; AVX512DQBW-NEXT:    vpmovm2q %k0, %xmm0
1463; AVX512DQBW-NEXT:    retq
1464  %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
1465  %and = and <2 x i1> %cmp, %y
1466  ret <2 x i1> %and
1467}
1468
1469define <4 x i1> @is_positive_mask_v4i32_v4i1(<4 x i32> %x, <4 x i1> %y) {
1470; SSE-LABEL: is_positive_mask_v4i32_v4i1:
1471; SSE:       # %bb.0:
1472; SSE-NEXT:    psrad $31, %xmm0
1473; SSE-NEXT:    pandn %xmm1, %xmm0
1474; SSE-NEXT:    retq
1475;
1476; AVX1-LABEL: is_positive_mask_v4i32_v4i1:
1477; AVX1:       # %bb.0:
1478; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
1479; AVX1-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1480; AVX1-NEXT:    retq
1481;
1482; AVX2-LABEL: is_positive_mask_v4i32_v4i1:
1483; AVX2:       # %bb.0:
1484; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1485; AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1486; AVX2-NEXT:    retq
1487;
1488; AVX512F-LABEL: is_positive_mask_v4i32_v4i1:
1489; AVX512F:       # %bb.0:
1490; AVX512F-NEXT:    vpslld $31, %xmm1, %xmm1
1491; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1492; AVX512F-NEXT:    vpcmpgtd %xmm2, %xmm0, %k1
1493; AVX512F-NEXT:    vptestmd %xmm1, %xmm1, %k1 {%k1}
1494; AVX512F-NEXT:    vmovdqa32 %xmm2, %xmm0 {%k1} {z}
1495; AVX512F-NEXT:    retq
1496;
1497; AVX512DQBW-LABEL: is_positive_mask_v4i32_v4i1:
1498; AVX512DQBW:       # %bb.0:
1499; AVX512DQBW-NEXT:    vpslld $31, %xmm1, %xmm1
1500; AVX512DQBW-NEXT:    vpmovd2m %xmm1, %k1
1501; AVX512DQBW-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1502; AVX512DQBW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
1503; AVX512DQBW-NEXT:    vpmovm2d %k0, %xmm0
1504; AVX512DQBW-NEXT:    retq
1505  %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
1506  %and = and <4 x i1> %y, %cmp
1507  ret <4 x i1> %and
1508}
1509
1510define <8 x i1> @is_positive_mask_v8i16_v8i1(<8 x i16> %x, <8 x i1> %y) {
1511; SSE-LABEL: is_positive_mask_v8i16_v8i1:
1512; SSE:       # %bb.0:
1513; SSE-NEXT:    psraw $15, %xmm0
1514; SSE-NEXT:    pandn %xmm1, %xmm0
1515; SSE-NEXT:    retq
1516;
1517; AVX1-LABEL: is_positive_mask_v8i16_v8i1:
1518; AVX1:       # %bb.0:
1519; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
1520; AVX1-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1521; AVX1-NEXT:    retq
1522;
1523; AVX2-LABEL: is_positive_mask_v8i16_v8i1:
1524; AVX2:       # %bb.0:
1525; AVX2-NEXT:    vpsraw $15, %xmm0, %xmm0
1526; AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1527; AVX2-NEXT:    retq
1528;
1529; AVX512F-LABEL: is_positive_mask_v8i16_v8i1:
1530; AVX512F:       # %bb.0:
1531; AVX512F-NEXT:    vpsraw $15, %xmm0, %xmm0
1532; AVX512F-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1533; AVX512F-NEXT:    retq
1534;
1535; AVX512DQBW-LABEL: is_positive_mask_v8i16_v8i1:
1536; AVX512DQBW:       # %bb.0:
1537; AVX512DQBW-NEXT:    vpsllw $15, %xmm1, %xmm1
1538; AVX512DQBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1539; AVX512DQBW-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1540; AVX512DQBW-NEXT:    vpcmpgtw %xmm3, %xmm0, %k1
1541; AVX512DQBW-NEXT:    vpcmpgtw %xmm1, %xmm2, %k0 {%k1}
1542; AVX512DQBW-NEXT:    vpmovm2w %k0, %xmm0
1543; AVX512DQBW-NEXT:    retq
1544  %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1545  %and = and <8 x i1> %cmp, %y
1546  ret <8 x i1> %and
1547}
1548
1549define <16 x i1> @is_positive_mask_v16i8_v16i1(<16 x i8> %x, <16 x i1> %y) {
1550; SSE-LABEL: is_positive_mask_v16i8_v16i1:
1551; SSE:       # %bb.0:
1552; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
1553; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
1554; SSE-NEXT:    pand %xmm1, %xmm0
1555; SSE-NEXT:    retq
1556;
1557; AVX1-LABEL: is_positive_mask_v16i8_v16i1:
1558; AVX1:       # %bb.0:
1559; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1560; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
1561; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
1562; AVX1-NEXT:    retq
1563;
1564; AVX2-LABEL: is_positive_mask_v16i8_v16i1:
1565; AVX2:       # %bb.0:
1566; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1567; AVX2-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
1568; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
1569; AVX2-NEXT:    retq
1570;
1571; AVX512F-LABEL: is_positive_mask_v16i8_v16i1:
1572; AVX512F:       # %bb.0:
1573; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1574; AVX512F-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
1575; AVX512F-NEXT:    vpand %xmm0, %xmm1, %xmm0
1576; AVX512F-NEXT:    retq
1577;
1578; AVX512DQBW-LABEL: is_positive_mask_v16i8_v16i1:
1579; AVX512DQBW:       # %bb.0:
1580; AVX512DQBW-NEXT:    vpsllw $7, %xmm1, %xmm1
1581; AVX512DQBW-NEXT:    vpmovb2m %xmm1, %k1
1582; AVX512DQBW-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1583; AVX512DQBW-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
1584; AVX512DQBW-NEXT:    vpmovm2b %k0, %xmm0
1585; AVX512DQBW-NEXT:    retq
1586  %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1587  %and = and <16 x i1> %y, %cmp
1588  ret <16 x i1> %and
1589}
1590
1591define <4 x i1> @is_positive_mask_v4i64_v4i1(<4 x i64> %x, <4 x i1> %y) {
1592; SSE2-LABEL: is_positive_mask_v4i64_v4i1:
1593; SSE2:       # %bb.0:
1594; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
1595; SSE2-NEXT:    psrad $31, %xmm0
1596; SSE2-NEXT:    pandn %xmm2, %xmm0
1597; SSE2-NEXT:    retq
1598;
1599; SSE42-LABEL: is_positive_mask_v4i64_v4i1:
1600; SSE42:       # %bb.0:
1601; SSE42-NEXT:    pcmpeqd %xmm3, %xmm3
1602; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
1603; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1604; SSE42-NEXT:    packssdw %xmm1, %xmm0
1605; SSE42-NEXT:    pand %xmm2, %xmm0
1606; SSE42-NEXT:    retq
1607;
1608; AVX1-LABEL: is_positive_mask_v4i64_v4i1:
1609; AVX1:       # %bb.0:
1610; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1611; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1612; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1613; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm0
1614; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1615; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1616; AVX1-NEXT:    vzeroupper
1617; AVX1-NEXT:    retq
1618;
1619; AVX2-LABEL: is_positive_mask_v4i64_v4i1:
1620; AVX2:       # %bb.0:
1621; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1622; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
1623; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1624; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1625; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1626; AVX2-NEXT:    vzeroupper
1627; AVX2-NEXT:    retq
1628;
1629; AVX512F-LABEL: is_positive_mask_v4i64_v4i1:
1630; AVX512F:       # %bb.0:
1631; AVX512F-NEXT:    vpslld $31, %xmm1, %xmm1
1632; AVX512F-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1633; AVX512F-NEXT:    vpcmpgtq %ymm2, %ymm0, %k1
1634; AVX512F-NEXT:    vptestmd %xmm1, %xmm1, %k1 {%k1}
1635; AVX512F-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1636; AVX512F-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1637; AVX512F-NEXT:    vzeroupper
1638; AVX512F-NEXT:    retq
1639;
1640; AVX512DQBW-LABEL: is_positive_mask_v4i64_v4i1:
1641; AVX512DQBW:       # %bb.0:
1642; AVX512DQBW-NEXT:    vpslld $31, %xmm1, %xmm1
1643; AVX512DQBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1644; AVX512DQBW-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
1645; AVX512DQBW-NEXT:    vpcmpgtq %ymm3, %ymm0, %k1
1646; AVX512DQBW-NEXT:    vpcmpgtd %xmm1, %xmm2, %k0 {%k1}
1647; AVX512DQBW-NEXT:    vpmovm2d %k0, %xmm0
1648; AVX512DQBW-NEXT:    vzeroupper
1649; AVX512DQBW-NEXT:    retq
1650  %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
1651  %and = and <4 x i1> %cmp, %y
1652  ret <4 x i1> %and
1653}
1654
1655define <8 x i1> @is_positive_mask_v8i32_v8i1(<8 x i32> %x, <8 x i1> %y) {
1656; SSE-LABEL: is_positive_mask_v8i32_v8i1:
1657; SSE:       # %bb.0:
1658; SSE-NEXT:    pcmpeqd %xmm3, %xmm3
1659; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
1660; SSE-NEXT:    pcmpgtd %xmm3, %xmm0
1661; SSE-NEXT:    packssdw %xmm1, %xmm0
1662; SSE-NEXT:    pand %xmm2, %xmm0
1663; SSE-NEXT:    retq
1664;
1665; AVX1-LABEL: is_positive_mask_v8i32_v8i1:
1666; AVX1:       # %bb.0:
1667; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1668; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1669; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm2, %xmm2
1670; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm0, %xmm0
1671; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1672; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
1673; AVX1-NEXT:    vzeroupper
1674; AVX1-NEXT:    retq
1675;
1676; AVX2-LABEL: is_positive_mask_v8i32_v8i1:
1677; AVX2:       # %bb.0:
1678; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1679; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm0, %ymm0
1680; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1681; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1682; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
1683; AVX2-NEXT:    vzeroupper
1684; AVX2-NEXT:    retq
1685;
1686; AVX512F-LABEL: is_positive_mask_v8i32_v8i1:
1687; AVX512F:       # %bb.0:
1688; AVX512F-NEXT:    vpmovsxwd %xmm1, %ymm1
1689; AVX512F-NEXT:    vpslld $31, %ymm1, %ymm1
1690; AVX512F-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1691; AVX512F-NEXT:    vpcmpgtd %ymm2, %ymm0, %k1
1692; AVX512F-NEXT:    vptestmd %ymm1, %ymm1, %k1 {%k1}
1693; AVX512F-NEXT:    vmovdqa32 %ymm2, %ymm0 {%k1} {z}
1694; AVX512F-NEXT:    vpmovdw %ymm0, %xmm0
1695; AVX512F-NEXT:    vzeroupper
1696; AVX512F-NEXT:    retq
1697;
1698; AVX512DQBW-LABEL: is_positive_mask_v8i32_v8i1:
1699; AVX512DQBW:       # %bb.0:
1700; AVX512DQBW-NEXT:    vpsllw $15, %xmm1, %xmm1
1701; AVX512DQBW-NEXT:    vpmovw2m %xmm1, %k1
1702; AVX512DQBW-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1703; AVX512DQBW-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
1704; AVX512DQBW-NEXT:    vpmovm2w %k0, %xmm0
1705; AVX512DQBW-NEXT:    vzeroupper
1706; AVX512DQBW-NEXT:    retq
1707  %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1708  %and = and <8 x i1> %y, %cmp
1709  ret <8 x i1> %and
1710}
1711
1712define <16 x i1> @is_positive_mask_v16i16_v16i1(<16 x i16> %x, <16 x i1> %y) {
1713; SSE-LABEL: is_positive_mask_v16i16_v16i1:
1714; SSE:       # %bb.0:
1715; SSE-NEXT:    pcmpeqd %xmm3, %xmm3
1716; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
1717; SSE-NEXT:    pcmpgtw %xmm3, %xmm0
1718; SSE-NEXT:    packsswb %xmm1, %xmm0
1719; SSE-NEXT:    pand %xmm2, %xmm0
1720; SSE-NEXT:    retq
1721;
1722; AVX1-LABEL: is_positive_mask_v16i16_v16i1:
1723; AVX1:       # %bb.0:
1724; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1725; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1726; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm2, %xmm2
1727; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm0, %xmm0
1728; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
1729; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1730; AVX1-NEXT:    vzeroupper
1731; AVX1-NEXT:    retq
1732;
1733; AVX2-LABEL: is_positive_mask_v16i16_v16i1:
1734; AVX2:       # %bb.0:
1735; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1736; AVX2-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm0
1737; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1738; AVX2-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
1739; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1740; AVX2-NEXT:    vzeroupper
1741; AVX2-NEXT:    retq
1742;
1743; AVX512F-LABEL: is_positive_mask_v16i16_v16i1:
1744; AVX512F:       # %bb.0:
1745; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
1746; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
1747; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
1748; AVX512F-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1749; AVX512F-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
1750; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
1751; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1 {%k1}
1752; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1753; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1754; AVX512F-NEXT:    vzeroupper
1755; AVX512F-NEXT:    retq
1756;
1757; AVX512DQBW-LABEL: is_positive_mask_v16i16_v16i1:
1758; AVX512DQBW:       # %bb.0:
1759; AVX512DQBW-NEXT:    vpsllw $7, %xmm1, %xmm1
1760; AVX512DQBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1761; AVX512DQBW-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
1762; AVX512DQBW-NEXT:    vpcmpgtw %ymm3, %ymm0, %k1
1763; AVX512DQBW-NEXT:    vpcmpgtb %xmm1, %xmm2, %k0 {%k1}
1764; AVX512DQBW-NEXT:    vpmovm2b %k0, %xmm0
1765; AVX512DQBW-NEXT:    vzeroupper
1766; AVX512DQBW-NEXT:    retq
1767  %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1768  %and = and <16 x i1> %cmp, %y
1769  ret <16 x i1> %and
1770}
1771
1772define <32 x i1> @is_positive_mask_v32i8_v32i1(<32 x i8> %x, <32 x i1> %y) {
1773; SSE2-LABEL: is_positive_mask_v32i8_v32i1:
1774; SSE2:       # %bb.0:
1775; SSE2-NEXT:    movq %rdi, %rax
1776; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1777; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1778; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1779; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1780; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1781; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
1782; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
1783; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1784; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1785; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1786; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1787; SSE2-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
1788; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
1789; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
1790; SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
1791; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1792; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1793; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1794; SSE2-NEXT:    movd %r9d, %xmm4
1795; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1796; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
1797; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
1798; SSE2-NEXT:    movd %r8d, %xmm2
1799; SSE2-NEXT:    movd %ecx, %xmm3
1800; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1801; SSE2-NEXT:    movd %edx, %xmm6
1802; SSE2-NEXT:    movd %esi, %xmm2
1803; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3],xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7]
1804; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
1805; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1806; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
1807; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1808; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1809; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1810; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1811; SSE2-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
1812; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
1813; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
1814; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1815; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1816; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1817; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1818; SSE2-NEXT:    movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
1819; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
1820; SSE2-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3]
1821; SSE2-NEXT:    punpckldq {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1]
1822; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1823; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1824; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1825; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1826; SSE2-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
1827; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
1828; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
1829; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1830; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1831; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1832; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1833; SSE2-NEXT:    movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
1834; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
1835; SSE2-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
1836; SSE2-NEXT:    punpckldq {{.*#+}} xmm7 = xmm7[0],xmm5[0],xmm7[1],xmm5[1]
1837; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm6[0]
1838; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
1839; SSE2-NEXT:    pcmpgtb %xmm3, %xmm0
1840; SSE2-NEXT:    pand %xmm2, %xmm0
1841; SSE2-NEXT:    pcmpgtb %xmm3, %xmm1
1842; SSE2-NEXT:    pand %xmm7, %xmm1
1843; SSE2-NEXT:    psllw $7, %xmm1
1844; SSE2-NEXT:    pmovmskb %xmm1, %ecx
1845; SSE2-NEXT:    shll $16, %ecx
1846; SSE2-NEXT:    psllw $7, %xmm0
1847; SSE2-NEXT:    pmovmskb %xmm0, %edx
1848; SSE2-NEXT:    orl %ecx, %edx
1849; SSE2-NEXT:    movl %edx, (%rdi)
1850; SSE2-NEXT:    retq
1851;
1852; SSE42-LABEL: is_positive_mask_v32i8_v32i1:
1853; SSE42:       # %bb.0:
1854; SSE42-NEXT:    movq %rdi, %rax
1855; SSE42-NEXT:    movd %esi, %xmm2
1856; SSE42-NEXT:    pinsrb $1, %edx, %xmm2
1857; SSE42-NEXT:    pinsrb $2, %ecx, %xmm2
1858; SSE42-NEXT:    pinsrb $3, %r8d, %xmm2
1859; SSE42-NEXT:    pinsrb $4, %r9d, %xmm2
1860; SSE42-NEXT:    pinsrb $5, {{[0-9]+}}(%rsp), %xmm2
1861; SSE42-NEXT:    pinsrb $6, {{[0-9]+}}(%rsp), %xmm2
1862; SSE42-NEXT:    pinsrb $7, {{[0-9]+}}(%rsp), %xmm2
1863; SSE42-NEXT:    pinsrb $8, {{[0-9]+}}(%rsp), %xmm2
1864; SSE42-NEXT:    pinsrb $9, {{[0-9]+}}(%rsp), %xmm2
1865; SSE42-NEXT:    pinsrb $10, {{[0-9]+}}(%rsp), %xmm2
1866; SSE42-NEXT:    pinsrb $11, {{[0-9]+}}(%rsp), %xmm2
1867; SSE42-NEXT:    pinsrb $12, {{[0-9]+}}(%rsp), %xmm2
1868; SSE42-NEXT:    pinsrb $13, {{[0-9]+}}(%rsp), %xmm2
1869; SSE42-NEXT:    pinsrb $14, {{[0-9]+}}(%rsp), %xmm2
1870; SSE42-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm2
1871; SSE42-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1872; SSE42-NEXT:    pinsrb $1, {{[0-9]+}}(%rsp), %xmm3
1873; SSE42-NEXT:    pinsrb $2, {{[0-9]+}}(%rsp), %xmm3
1874; SSE42-NEXT:    pinsrb $3, {{[0-9]+}}(%rsp), %xmm3
1875; SSE42-NEXT:    pinsrb $4, {{[0-9]+}}(%rsp), %xmm3
1876; SSE42-NEXT:    pinsrb $5, {{[0-9]+}}(%rsp), %xmm3
1877; SSE42-NEXT:    pinsrb $6, {{[0-9]+}}(%rsp), %xmm3
1878; SSE42-NEXT:    pinsrb $7, {{[0-9]+}}(%rsp), %xmm3
1879; SSE42-NEXT:    pinsrb $8, {{[0-9]+}}(%rsp), %xmm3
1880; SSE42-NEXT:    pinsrb $9, {{[0-9]+}}(%rsp), %xmm3
1881; SSE42-NEXT:    pinsrb $10, {{[0-9]+}}(%rsp), %xmm3
1882; SSE42-NEXT:    pinsrb $11, {{[0-9]+}}(%rsp), %xmm3
1883; SSE42-NEXT:    pinsrb $12, {{[0-9]+}}(%rsp), %xmm3
1884; SSE42-NEXT:    pinsrb $13, {{[0-9]+}}(%rsp), %xmm3
1885; SSE42-NEXT:    pinsrb $14, {{[0-9]+}}(%rsp), %xmm3
1886; SSE42-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm3
1887; SSE42-NEXT:    pcmpeqd %xmm4, %xmm4
1888; SSE42-NEXT:    pcmpgtb %xmm4, %xmm1
1889; SSE42-NEXT:    pand %xmm3, %xmm1
1890; SSE42-NEXT:    pcmpgtb %xmm4, %xmm0
1891; SSE42-NEXT:    pand %xmm2, %xmm0
1892; SSE42-NEXT:    psllw $7, %xmm0
1893; SSE42-NEXT:    pmovmskb %xmm0, %ecx
1894; SSE42-NEXT:    psllw $7, %xmm1
1895; SSE42-NEXT:    pmovmskb %xmm1, %edx
1896; SSE42-NEXT:    shll $16, %edx
1897; SSE42-NEXT:    orl %ecx, %edx
1898; SSE42-NEXT:    movl %edx, (%rdi)
1899; SSE42-NEXT:    retq
1900;
1901; AVX1-LABEL: is_positive_mask_v32i8_v32i1:
1902; AVX1:       # %bb.0:
1903; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1904; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1905; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm2, %xmm2
1906; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm0, %xmm0
1907; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1908; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1909; AVX1-NEXT:    retq
1910;
1911; AVX2-LABEL: is_positive_mask_v32i8_v32i1:
1912; AVX2:       # %bb.0:
1913; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1914; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1915; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
1916; AVX2-NEXT:    retq
1917;
1918; AVX512F-LABEL: is_positive_mask_v32i8_v32i1:
1919; AVX512F:       # %bb.0:
1920; AVX512F-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1921; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1922; AVX512F-NEXT:    vpand %ymm0, %ymm1, %ymm0
1923; AVX512F-NEXT:    retq
1924;
1925; AVX512DQBW-LABEL: is_positive_mask_v32i8_v32i1:
1926; AVX512DQBW:       # %bb.0:
1927; AVX512DQBW-NEXT:    vpsllw $7, %ymm1, %ymm1
1928; AVX512DQBW-NEXT:    vpmovb2m %ymm1, %k1
1929; AVX512DQBW-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1930; AVX512DQBW-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
1931; AVX512DQBW-NEXT:    vpmovm2b %k0, %ymm0
1932; AVX512DQBW-NEXT:    retq
1933  %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1934  %and = and <32 x i1> %y, %cmp
1935  ret <32 x i1> %and
1936}
1937
1938define <4 x i64> @PR52504(<4 x i16> %t3) {
1939; SSE2-LABEL: PR52504:
1940; SSE2:       # %bb.0:
1941; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1942; SSE2-NEXT:    psrad $16, %xmm1
1943; SSE2-NEXT:    pxor %xmm2, %xmm2
1944; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
1945; SSE2-NEXT:    movdqa %xmm1, %xmm3
1946; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1947; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1948; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1]
1949; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
1950; SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
1951; SSE2-NEXT:    pand %xmm0, %xmm1
1952; SSE2-NEXT:    pxor %xmm4, %xmm0
1953; SSE2-NEXT:    por %xmm1, %xmm0
1954; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
1955; SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
1956; SSE2-NEXT:    pand %xmm1, %xmm3
1957; SSE2-NEXT:    pxor %xmm4, %xmm1
1958; SSE2-NEXT:    por %xmm3, %xmm1
1959; SSE2-NEXT:    retq
1960;
1961; SSE42-LABEL: PR52504:
1962; SSE42:       # %bb.0:
1963; SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1964; SSE42-NEXT:    pmovsxwq %xmm1, %xmm2
1965; SSE42-NEXT:    pmovsxwq %xmm0, %xmm3
1966; SSE42-NEXT:    pxor %xmm1, %xmm1
1967; SSE42-NEXT:    pxor %xmm0, %xmm0
1968; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1969; SSE42-NEXT:    por %xmm3, %xmm0
1970; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
1971; SSE42-NEXT:    por %xmm2, %xmm1
1972; SSE42-NEXT:    retq
1973;
1974; AVX1-LABEL: PR52504:
1975; AVX1:       # %bb.0:
1976; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm1
1977; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1978; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0
1979; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1980; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm3
1981; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
1982; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm2
1983; AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
1984; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1985; AVX1-NEXT:    retq
1986;
1987; AVX2-LABEL: PR52504:
1988; AVX2:       # %bb.0:
1989; AVX2-NEXT:    vpmovsxwq %xmm0, %ymm0
1990; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1991; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm1
1992; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
1993; AVX2-NEXT:    retq
1994;
1995; AVX512-LABEL: PR52504:
1996; AVX512:       # %bb.0:
1997; AVX512-NEXT:    vpmovsxwq %xmm0, %ymm0
1998; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1999; AVX512-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
2000; AVX512-NEXT:    retq
2001  %t14 = sext <4 x i16> %t3 to <4 x i64>
2002  %t15 = icmp sgt <4 x i64> %t14, <i64 -1, i64 -1, i64 -1, i64 -1>
2003  %t16 = select <4 x i1> %t15, <4 x i64> %t14, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
2004  ret <4 x i64> %t16
2005}
2006