xref: /llvm-project/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll (revision 5b89aaab00ab3fb7f7ed7c3b38da34ba25eee235)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6
7; PR33276 - https://bugs.llvm.org/show_bug.cgi?id=33276
8; If both operands of an unsigned icmp are known non-negative, then
9; we don't need to flip the sign bits in order to map to signed pcmpgt*.
10
11define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
12; SSE2-LABEL: ugt_v2i64:
13; SSE2:       # %bb.0:
14; SSE2-NEXT:    psrlq $1, %xmm0
15; SSE2-NEXT:    psrlq $1, %xmm1
16; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
17; SSE2-NEXT:    pxor %xmm2, %xmm1
18; SSE2-NEXT:    pxor %xmm2, %xmm0
19; SSE2-NEXT:    movdqa %xmm0, %xmm2
20; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
21; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
23; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24; SSE2-NEXT:    pand %xmm3, %xmm1
25; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26; SSE2-NEXT:    por %xmm1, %xmm0
27; SSE2-NEXT:    retq
28;
29; SSE41-LABEL: ugt_v2i64:
30; SSE41:       # %bb.0:
31; SSE41-NEXT:    psrlq $1, %xmm0
32; SSE41-NEXT:    psrlq $1, %xmm1
33; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
34; SSE41-NEXT:    pxor %xmm2, %xmm1
35; SSE41-NEXT:    pxor %xmm2, %xmm0
36; SSE41-NEXT:    movdqa %xmm0, %xmm2
37; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
38; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
39; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
40; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
41; SSE41-NEXT:    pand %xmm3, %xmm1
42; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
43; SSE41-NEXT:    por %xmm1, %xmm0
44; SSE41-NEXT:    retq
45;
46; AVX-LABEL: ugt_v2i64:
47; AVX:       # %bb.0:
48; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
49; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
50; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
51; AVX-NEXT:    retq
52  %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
53  %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
54  %cmp = icmp ugt <2 x i64> %sh1, %sh2
55  ret <2 x i1> %cmp
56}
57
58define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
59; SSE2-LABEL: ult_v2i64:
60; SSE2:       # %bb.0:
61; SSE2-NEXT:    psrlq $1, %xmm0
62; SSE2-NEXT:    psrlq $1, %xmm1
63; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
64; SSE2-NEXT:    pxor %xmm2, %xmm0
65; SSE2-NEXT:    pxor %xmm2, %xmm1
66; SSE2-NEXT:    movdqa %xmm1, %xmm2
67; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
68; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
69; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
70; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
71; SSE2-NEXT:    pand %xmm3, %xmm1
72; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
73; SSE2-NEXT:    por %xmm1, %xmm0
74; SSE2-NEXT:    retq
75;
76; SSE41-LABEL: ult_v2i64:
77; SSE41:       # %bb.0:
78; SSE41-NEXT:    psrlq $1, %xmm0
79; SSE41-NEXT:    psrlq $1, %xmm1
80; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
81; SSE41-NEXT:    pxor %xmm2, %xmm0
82; SSE41-NEXT:    pxor %xmm2, %xmm1
83; SSE41-NEXT:    movdqa %xmm1, %xmm2
84; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
85; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
86; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
87; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
88; SSE41-NEXT:    pand %xmm3, %xmm1
89; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
90; SSE41-NEXT:    por %xmm1, %xmm0
91; SSE41-NEXT:    retq
92;
93; AVX-LABEL: ult_v2i64:
94; AVX:       # %bb.0:
95; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
96; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
97; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
98; AVX-NEXT:    retq
99  %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
100  %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
101  %cmp = icmp ult <2 x i64> %sh1, %sh2
102  ret <2 x i1> %cmp
103}
104
105define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
106; SSE2-LABEL: uge_v2i64:
107; SSE2:       # %bb.0:
108; SSE2-NEXT:    psrlq $1, %xmm0
109; SSE2-NEXT:    psrlq $1, %xmm1
110; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
111; SSE2-NEXT:    pxor %xmm2, %xmm0
112; SSE2-NEXT:    pxor %xmm2, %xmm1
113; SSE2-NEXT:    movdqa %xmm1, %xmm2
114; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
115; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
116; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
117; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
118; SSE2-NEXT:    pand %xmm3, %xmm0
119; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
120; SSE2-NEXT:    por %xmm0, %xmm1
121; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
122; SSE2-NEXT:    pxor %xmm1, %xmm0
123; SSE2-NEXT:    retq
124;
125; SSE41-LABEL: uge_v2i64:
126; SSE41:       # %bb.0:
127; SSE41-NEXT:    psrlq $1, %xmm0
128; SSE41-NEXT:    psrlq $1, %xmm1
129; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
130; SSE41-NEXT:    pxor %xmm2, %xmm0
131; SSE41-NEXT:    pxor %xmm2, %xmm1
132; SSE41-NEXT:    movdqa %xmm1, %xmm2
133; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
134; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
135; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
136; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
137; SSE41-NEXT:    pand %xmm3, %xmm0
138; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
139; SSE41-NEXT:    por %xmm0, %xmm1
140; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
141; SSE41-NEXT:    pxor %xmm1, %xmm0
142; SSE41-NEXT:    retq
143;
144; AVX-LABEL: uge_v2i64:
145; AVX:       # %bb.0:
146; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
147; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
148; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
149; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
150; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
151; AVX-NEXT:    retq
152  %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
153  %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
154  %cmp = icmp uge <2 x i64> %sh1, %sh2
155  ret <2 x i1> %cmp
156}
157
158define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
159; SSE2-LABEL: ule_v2i64:
160; SSE2:       # %bb.0:
161; SSE2-NEXT:    psrlq $1, %xmm0
162; SSE2-NEXT:    psrlq $1, %xmm1
163; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
164; SSE2-NEXT:    pxor %xmm2, %xmm1
165; SSE2-NEXT:    pxor %xmm2, %xmm0
166; SSE2-NEXT:    movdqa %xmm0, %xmm2
167; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
168; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
169; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
170; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
171; SSE2-NEXT:    pand %xmm3, %xmm0
172; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
173; SSE2-NEXT:    por %xmm0, %xmm1
174; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
175; SSE2-NEXT:    pxor %xmm1, %xmm0
176; SSE2-NEXT:    retq
177;
178; SSE41-LABEL: ule_v2i64:
179; SSE41:       # %bb.0:
180; SSE41-NEXT:    psrlq $1, %xmm0
181; SSE41-NEXT:    psrlq $1, %xmm1
182; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
183; SSE41-NEXT:    pxor %xmm2, %xmm1
184; SSE41-NEXT:    pxor %xmm2, %xmm0
185; SSE41-NEXT:    movdqa %xmm0, %xmm2
186; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
187; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
188; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
189; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
190; SSE41-NEXT:    pand %xmm3, %xmm0
191; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
192; SSE41-NEXT:    por %xmm0, %xmm1
193; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
194; SSE41-NEXT:    pxor %xmm1, %xmm0
195; SSE41-NEXT:    retq
196;
197; AVX-LABEL: ule_v2i64:
198; AVX:       # %bb.0:
199; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
200; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
201; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
202; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
203; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
204; AVX-NEXT:    retq
205  %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
206  %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
207  %cmp = icmp ule <2 x i64> %sh1, %sh2
208  ret <2 x i1> %cmp
209}
210
211define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) {
212; SSE-LABEL: ugt_v4i32:
213; SSE:       # %bb.0:
214; SSE-NEXT:    psrld $1, %xmm0
215; SSE-NEXT:    psrld $1, %xmm1
216; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
217; SSE-NEXT:    retq
218;
219; AVX-LABEL: ugt_v4i32:
220; AVX:       # %bb.0:
221; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
222; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
223; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
224; AVX-NEXT:    retq
225  %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
226  %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
227  %cmp = icmp ugt <4 x i32> %sh1, %sh2
228  ret <4 x i1> %cmp
229}
230
231define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) {
232; SSE-LABEL: ult_v4i32:
233; SSE:       # %bb.0:
234; SSE-NEXT:    psrld $1, %xmm0
235; SSE-NEXT:    psrld $1, %xmm1
236; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
237; SSE-NEXT:    movdqa %xmm1, %xmm0
238; SSE-NEXT:    retq
239;
240; AVX-LABEL: ult_v4i32:
241; AVX:       # %bb.0:
242; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
243; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
244; AVX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
245; AVX-NEXT:    retq
246  %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
247  %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
248  %cmp = icmp ult <4 x i32> %sh1, %sh2
249  ret <4 x i1> %cmp
250}
251
252define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) {
253; SSE2-LABEL: uge_v4i32:
254; SSE2:       # %bb.0:
255; SSE2-NEXT:    psrld $1, %xmm0
256; SSE2-NEXT:    psrld $1, %xmm1
257; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
258; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
259; SSE2-NEXT:    pxor %xmm1, %xmm0
260; SSE2-NEXT:    retq
261;
262; SSE41-LABEL: uge_v4i32:
263; SSE41:       # %bb.0:
264; SSE41-NEXT:    psrld $1, %xmm0
265; SSE41-NEXT:    psrld $1, %xmm1
266; SSE41-NEXT:    pmaxud %xmm0, %xmm1
267; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
268; SSE41-NEXT:    retq
269;
270; AVX-LABEL: uge_v4i32:
271; AVX:       # %bb.0:
272; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
273; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
274; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm1
275; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
276; AVX-NEXT:    retq
277  %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
278  %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
279  %cmp = icmp uge <4 x i32> %sh1, %sh2
280  ret <4 x i1> %cmp
281}
282
283define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) {
284; SSE2-LABEL: ule_v4i32:
285; SSE2:       # %bb.0:
286; SSE2-NEXT:    psrld $1, %xmm0
287; SSE2-NEXT:    psrld $1, %xmm1
288; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
289; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
290; SSE2-NEXT:    pxor %xmm1, %xmm0
291; SSE2-NEXT:    retq
292;
293; SSE41-LABEL: ule_v4i32:
294; SSE41:       # %bb.0:
295; SSE41-NEXT:    psrld $1, %xmm0
296; SSE41-NEXT:    psrld $1, %xmm1
297; SSE41-NEXT:    pminud %xmm0, %xmm1
298; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
299; SSE41-NEXT:    retq
300;
301; AVX-LABEL: ule_v4i32:
302; AVX:       # %bb.0:
303; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
304; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
305; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm1
306; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
307; AVX-NEXT:    retq
308  %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
309  %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
310  %cmp = icmp ule <4 x i32> %sh1, %sh2
311  ret <4 x i1> %cmp
312}
313
314define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
315; SSE-LABEL: ugt_v8i16:
316; SSE:       # %bb.0:
317; SSE-NEXT:    psrlw $1, %xmm0
318; SSE-NEXT:    psrlw $1, %xmm1
319; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
320; SSE-NEXT:    retq
321;
322; AVX-LABEL: ugt_v8i16:
323; AVX:       # %bb.0:
324; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
325; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
326; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
327; AVX-NEXT:    retq
328  %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
329  %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
330  %cmp = icmp ugt <8 x i16> %sh1, %sh2
331  ret <8 x i1> %cmp
332}
333
334define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) {
335; SSE-LABEL: ult_v8i16:
336; SSE:       # %bb.0:
337; SSE-NEXT:    psrlw $1, %xmm0
338; SSE-NEXT:    psrlw $1, %xmm1
339; SSE-NEXT:    pcmpgtw %xmm0, %xmm1
340; SSE-NEXT:    movdqa %xmm1, %xmm0
341; SSE-NEXT:    retq
342;
343; AVX-LABEL: ult_v8i16:
344; AVX:       # %bb.0:
345; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
346; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
347; AVX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
348; AVX-NEXT:    retq
349  %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
350  %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
351  %cmp = icmp ult <8 x i16> %sh1, %sh2
352  ret <8 x i1> %cmp
353}
354
355define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) {
356; SSE2-LABEL: uge_v8i16:
357; SSE2:       # %bb.0:
358; SSE2-NEXT:    psrlw $1, %xmm0
359; SSE2-NEXT:    psrlw $1, %xmm1
360; SSE2-NEXT:    pcmpgtw %xmm0, %xmm1
361; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
362; SSE2-NEXT:    pxor %xmm1, %xmm0
363; SSE2-NEXT:    retq
364;
365; SSE41-LABEL: uge_v8i16:
366; SSE41:       # %bb.0:
367; SSE41-NEXT:    psrlw $1, %xmm0
368; SSE41-NEXT:    psrlw $1, %xmm1
369; SSE41-NEXT:    pmaxuw %xmm0, %xmm1
370; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
371; SSE41-NEXT:    retq
372;
373; AVX-LABEL: uge_v8i16:
374; AVX:       # %bb.0:
375; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
376; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
377; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
378; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
379; AVX-NEXT:    retq
380  %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
381  %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
382  %cmp = icmp uge <8 x i16> %sh1, %sh2
383  ret <8 x i1> %cmp
384}
385
386define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) {
387; SSE2-LABEL: ule_v8i16:
388; SSE2:       # %bb.0:
389; SSE2-NEXT:    psrlw $1, %xmm0
390; SSE2-NEXT:    psrlw $1, %xmm1
391; SSE2-NEXT:    pcmpgtw %xmm1, %xmm0
392; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
393; SSE2-NEXT:    pxor %xmm1, %xmm0
394; SSE2-NEXT:    retq
395;
396; SSE41-LABEL: ule_v8i16:
397; SSE41:       # %bb.0:
398; SSE41-NEXT:    psrlw $1, %xmm0
399; SSE41-NEXT:    psrlw $1, %xmm1
400; SSE41-NEXT:    pminuw %xmm0, %xmm1
401; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
402; SSE41-NEXT:    retq
403;
404; AVX-LABEL: ule_v8i16:
405; AVX:       # %bb.0:
406; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
407; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
408; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm1
409; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
410; AVX-NEXT:    retq
411  %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
412  %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
413  %cmp = icmp ule <8 x i16> %sh1, %sh2
414  ret <8 x i1> %cmp
415}
416
417define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) {
418; SSE-LABEL: ugt_v16i8:
419; SSE:       # %bb.0:
420; SSE-NEXT:    psrlw $1, %xmm0
421; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
422; SSE-NEXT:    pand %xmm2, %xmm0
423; SSE-NEXT:    psrlw $1, %xmm1
424; SSE-NEXT:    pand %xmm2, %xmm1
425; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
426; SSE-NEXT:    retq
427;
428; AVX1-LABEL: ugt_v16i8:
429; AVX1:       # %bb.0:
430; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0
431; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
432; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
433; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm1
434; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
435; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
436; AVX1-NEXT:    retq
437;
438; AVX2-LABEL: ugt_v16i8:
439; AVX2:       # %bb.0:
440; AVX2-NEXT:    vpsrlw $1, %xmm0, %xmm0
441; AVX2-NEXT:    vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
442; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
443; AVX2-NEXT:    vpsrlw $1, %xmm1, %xmm1
444; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
445; AVX2-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
446; AVX2-NEXT:    retq
447  %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
448  %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
449  %cmp = icmp ugt <16 x i8> %sh1, %sh2
450  ret <16 x i1> %cmp
451}
452
453define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
454; SSE-LABEL: ult_v16i8:
455; SSE:       # %bb.0:
456; SSE-NEXT:    psrlw $1, %xmm0
457; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
458; SSE-NEXT:    pand %xmm2, %xmm0
459; SSE-NEXT:    psrlw $1, %xmm1
460; SSE-NEXT:    pand %xmm1, %xmm2
461; SSE-NEXT:    pcmpgtb %xmm0, %xmm2
462; SSE-NEXT:    movdqa %xmm2, %xmm0
463; SSE-NEXT:    retq
464;
465; AVX1-LABEL: ult_v16i8:
466; AVX1:       # %bb.0:
467; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0
468; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
469; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
470; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm1
471; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
472; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
473; AVX1-NEXT:    retq
474;
475; AVX2-LABEL: ult_v16i8:
476; AVX2:       # %bb.0:
477; AVX2-NEXT:    vpsrlw $1, %xmm0, %xmm0
478; AVX2-NEXT:    vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
479; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
480; AVX2-NEXT:    vpsrlw $1, %xmm1, %xmm1
481; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
482; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
483; AVX2-NEXT:    retq
484  %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
485  %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
486  %cmp = icmp ult <16 x i8> %sh1, %sh2
487  ret <16 x i1> %cmp
488}
489
490define <16 x i1> @uge_v16i8(<16 x i8> %x, <16 x i8> %y) {
491; SSE-LABEL: uge_v16i8:
492; SSE:       # %bb.0:
493; SSE-NEXT:    psrlw $1, %xmm0
494; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
495; SSE-NEXT:    pand %xmm2, %xmm0
496; SSE-NEXT:    psrlw $1, %xmm1
497; SSE-NEXT:    pand %xmm1, %xmm2
498; SSE-NEXT:    pmaxub %xmm0, %xmm2
499; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
500; SSE-NEXT:    retq
501;
502; AVX1-LABEL: uge_v16i8:
503; AVX1:       # %bb.0:
504; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0
505; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
506; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
507; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm1
508; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
509; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
510; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
511; AVX1-NEXT:    retq
512;
513; AVX2-LABEL: uge_v16i8:
514; AVX2:       # %bb.0:
515; AVX2-NEXT:    vpsrlw $1, %xmm0, %xmm0
516; AVX2-NEXT:    vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
517; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
518; AVX2-NEXT:    vpsrlw $1, %xmm1, %xmm1
519; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
520; AVX2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
521; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
522; AVX2-NEXT:    retq
523  %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
524  %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
525  %cmp = icmp uge <16 x i8> %sh1, %sh2
526  ret <16 x i1> %cmp
527}
528
529define <16 x i1> @ule_v16i8(<16 x i8> %x, <16 x i8> %y) {
530; SSE-LABEL: ule_v16i8:
531; SSE:       # %bb.0:
532; SSE-NEXT:    psrlw $1, %xmm0
533; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
534; SSE-NEXT:    pand %xmm2, %xmm0
535; SSE-NEXT:    psrlw $1, %xmm1
536; SSE-NEXT:    pand %xmm1, %xmm2
537; SSE-NEXT:    pminub %xmm0, %xmm2
538; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
539; SSE-NEXT:    retq
540;
541; AVX1-LABEL: ule_v16i8:
542; AVX1:       # %bb.0:
543; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0
544; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
545; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
546; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm1
547; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
548; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm1
549; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
550; AVX1-NEXT:    retq
551;
552; AVX2-LABEL: ule_v16i8:
553; AVX2:       # %bb.0:
554; AVX2-NEXT:    vpsrlw $1, %xmm0, %xmm0
555; AVX2-NEXT:    vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
556; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
557; AVX2-NEXT:    vpsrlw $1, %xmm1, %xmm1
558; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
559; AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm1
560; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
561; AVX2-NEXT:    retq
562  %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
563  %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
564  %cmp = icmp ule <16 x i8> %sh1, %sh2
565  ret <16 x i1> %cmp
566}
567
568define <8 x i16> @PR47448_uge(i16 signext %0) {
569; SSE2-LABEL: PR47448_uge:
570; SSE2:       # %bb.0:
571; SSE2-NEXT:    andl $7, %edi
572; SSE2-NEXT:    movd %edi, %xmm0
573; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
574; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
575; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
576; SSE2-NEXT:    pcmpgtw %xmm0, %xmm1
577; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
578; SSE2-NEXT:    pxor %xmm1, %xmm0
579; SSE2-NEXT:    retq
580;
581; SSE41-LABEL: PR47448_uge:
582; SSE41:       # %bb.0:
583; SSE41-NEXT:    andl $7, %edi
584; SSE41-NEXT:    movd %edi, %xmm0
585; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
586; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
587; SSE41-NEXT:    pmovsxbw {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
588; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
589; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
590; SSE41-NEXT:    retq
591;
592; AVX1-LABEL: PR47448_uge:
593; AVX1:       # %bb.0:
594; AVX1-NEXT:    andl $7, %edi
595; AVX1-NEXT:    vmovd %edi, %xmm0
596; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
597; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
598; AVX1-NEXT:    vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
599; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
600; AVX1-NEXT:    retq
601;
602; AVX2-LABEL: PR47448_uge:
603; AVX2:       # %bb.0:
604; AVX2-NEXT:    andl $7, %edi
605; AVX2-NEXT:    vmovd %edi, %xmm0
606; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
607; AVX2-NEXT:    vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
608; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
609; AVX2-NEXT:    retq
610  %2 = and i16 %0, 7
611  %3 = insertelement <8 x i16> undef, i16 %2, i32 0
612  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
613  %5 = icmp uge <8 x i16> %4, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
614  %6 = sext <8 x i1> %5 to <8 x i16>
615  ret <8 x i16> %6
616}
617
618define <8 x i16> @PR47448_ugt(i16 signext %0) {
619; SSE-LABEL: PR47448_ugt:
620; SSE:       # %bb.0:
621; SSE-NEXT:    andl $7, %edi
622; SSE-NEXT:    movd %edi, %xmm0
623; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
624; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
625; SSE-NEXT:    pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
626; SSE-NEXT:    retq
627;
628; AVX1-LABEL: PR47448_ugt:
629; AVX1:       # %bb.0:
630; AVX1-NEXT:    andl $7, %edi
631; AVX1-NEXT:    vmovd %edi, %xmm0
632; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
633; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
634; AVX1-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
635; AVX1-NEXT:    retq
636;
637; AVX2-LABEL: PR47448_ugt:
638; AVX2:       # %bb.0:
639; AVX2-NEXT:    andl $7, %edi
640; AVX2-NEXT:    vmovd %edi, %xmm0
641; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
642; AVX2-NEXT:    vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
643; AVX2-NEXT:    retq
644  %2 = and i16 %0, 7
645  %3 = insertelement <8 x i16> undef, i16 %2, i32 0
646  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
647  %5 = icmp ugt <8 x i16> %4, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
648  %6 = sext <8 x i1> %5 to <8 x i16>
649  ret <8 x i16> %6
650}
651
652; Recognise the knownbits from X86ISD::AND in previous block.
653define void @PR54171(ptr %mask0, ptr %mask1, i64 %i) {
654; SSE-LABEL: PR54171:
655; SSE:       # %bb.0: # %entry
656; SSE-NEXT:    andq $7, %rdx
657; SSE-NEXT:    je .LBB18_2
658; SSE-NEXT:  # %bb.1: # %if.then
659; SSE-NEXT:    movd %edx, %xmm0
660; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
661; SSE-NEXT:    movdqa %xmm0, %xmm1
662; SSE-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
663; SSE-NEXT:    movdqa %xmm0, %xmm2
664; SSE-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
665; SSE-NEXT:    movdqa %xmm2, (%rdi)
666; SSE-NEXT:    movdqa %xmm1, 16(%rdi)
667; SSE-NEXT:    movdqa %xmm0, %xmm1
668; SSE-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
669; SSE-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
670; SSE-NEXT:    movdqa %xmm0, (%rsi)
671; SSE-NEXT:    movdqa %xmm1, 16(%rsi)
672; SSE-NEXT:  .LBB18_2: # %if.end
673; SSE-NEXT:    retq
674;
675; AVX1-LABEL: PR54171:
676; AVX1:       # %bb.0: # %entry
677; AVX1-NEXT:    andq $7, %rdx
678; AVX1-NEXT:    je .LBB18_2
679; AVX1-NEXT:  # %bb.1: # %if.then
680; AVX1-NEXT:    vmovd %edx, %xmm0
681; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
682; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
683; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
684; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [0.0E+0,0.0E+0,1.0E+0,1.0E+0,2.0E+0,2.0E+0,3.0E+0,3.0E+0]
685; AVX1-NEXT:    vcmpltps %ymm0, %ymm1, %ymm1
686; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
687; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [4.0E+0,4.0E+0,5.0E+0,5.0E+0,6.0E+0,6.0E+0,7.0E+0,7.0E+0]
688; AVX1-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
689; AVX1-NEXT:    vmovaps %ymm0, (%rsi)
690; AVX1-NEXT:  .LBB18_2: # %if.end
691; AVX1-NEXT:    vzeroupper
692; AVX1-NEXT:    retq
693;
694; AVX2-LABEL: PR54171:
695; AVX2:       # %bb.0: # %entry
696; AVX2-NEXT:    andq $7, %rdx
697; AVX2-NEXT:    je .LBB18_2
698; AVX2-NEXT:  # %bb.1: # %if.then
699; AVX2-NEXT:    vmovd %edx, %xmm0
700; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
701; AVX2-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
702; AVX2-NEXT:    vmovdqa %ymm1, (%rdi)
703; AVX2-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
704; AVX2-NEXT:    vmovdqa %ymm0, (%rsi)
705; AVX2-NEXT:  .LBB18_2: # %if.end
706; AVX2-NEXT:    vzeroupper
707; AVX2-NEXT:    retq
708entry:
709  %sub = and i64 %i, 7
710  %cmp.not = icmp eq i64 %sub, 0
711  br i1 %cmp.not, label %if.end, label %if.then
712
713if.then:
714  %conv = trunc i64 %sub to i32
715  %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv, i64 0
716  %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> poison, <8 x i32> zeroinitializer
717  %cmp.i = icmp ugt <8 x i32> %vecinit7.i.i, <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
718  %sext.i = sext <8 x i1> %cmp.i to <8 x i32>
719  store <8 x i32> %sext.i, ptr %mask0, align 32
720  %cmp.i18 = icmp ugt <8 x i32> %vecinit7.i.i, <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
721  %sext.i19 = sext <8 x i1> %cmp.i18 to <8 x i32>
722  store <8 x i32> %sext.i19, ptr %mask1, align 32
723  br label %if.end
724
725if.end:
726  ret void
727}
728