xref: /llvm-project/llvm/test/CodeGen/X86/combine-subo.ll (revision 8b43c1be23119c1024bed0a8ce392bc73727e2e2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
4
5declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
6declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
7declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone
8declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone
9
10
11declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
12declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
13declare { <4 x i8>, <4 x i1> } @llvm.ssub.with.overflow.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
14declare { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> , <4 x i8>) nounwind readnone
15
16; fold (ssub x, 0) -> x
17define i32 @combine_ssub_zero(i32 %a0, i32 %a1) {
18; CHECK-LABEL: combine_ssub_zero:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    movl %edi, %eax
21; CHECK-NEXT:    retq
22  %1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a0, i32 zeroinitializer)
23  %2 = extractvalue {i32, i1} %1, 0
24  %3 = extractvalue {i32, i1} %1, 1
25  %4 = select i1 %3, i32 %a1, i32 %2
26  ret i32 %4
27}
28
29define <4 x i32> @combine_vec_ssub_zero(<4 x i32> %a0, <4 x i32> %a1) {
30; CHECK-LABEL: combine_vec_ssub_zero:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    retq
33  %1 = call {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)
34  %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
35  %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
36  %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
37  ret <4 x i32> %4
38}
39
40; fold (usub x, 0) -> x
41define i32 @combine_usub_zero(i32 %a0, i32 %a1) {
42; CHECK-LABEL: combine_usub_zero:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    movl %edi, %eax
45; CHECK-NEXT:    retq
46  %1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a0, i32 zeroinitializer)
47  %2 = extractvalue {i32, i1} %1, 0
48  %3 = extractvalue {i32, i1} %1, 1
49  %4 = select i1 %3, i32 %a1, i32 %2
50  ret i32 %4
51}
52
53define <4 x i32> @combine_vec_usub_zero(<4 x i32> %a0, <4 x i32> %a1) {
54; CHECK-LABEL: combine_vec_usub_zero:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    retq
57  %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)
58  %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
59  %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
60  %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
61  ret <4 x i32> %4
62}
63
64; fold (ssub x, x) -> 0
65define i32 @combine_ssub_self(i32 %a0, i32 %a1) {
66; CHECK-LABEL: combine_ssub_self:
67; CHECK:       # %bb.0:
68; CHECK-NEXT:    xorl %eax, %eax
69; CHECK-NEXT:    retq
70  %1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a0, i32 %a0)
71  %2 = extractvalue {i32, i1} %1, 0
72  %3 = extractvalue {i32, i1} %1, 1
73  %4 = select i1 %3, i32 %a1, i32 %2
74  ret i32 %4
75}
76
77define <4 x i32> @combine_vec_ssub_self(<4 x i32> %a0, <4 x i32> %a1) {
78; SSE-LABEL: combine_vec_ssub_self:
79; SSE:       # %bb.0:
80; SSE-NEXT:    xorps %xmm0, %xmm0
81; SSE-NEXT:    retq
82;
83; AVX-LABEL: combine_vec_ssub_self:
84; AVX:       # %bb.0:
85; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
86; AVX-NEXT:    retq
87  %1 = call {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0)
88  %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
89  %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
90  %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
91  ret <4 x i32> %4
92}
93
94; fold (usub x, x) -> x
95define i32 @combine_usub_self(i32 %a0, i32 %a1) {
96; CHECK-LABEL: combine_usub_self:
97; CHECK:       # %bb.0:
98; CHECK-NEXT:    xorl %eax, %eax
99; CHECK-NEXT:    retq
100  %1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a0, i32 %a0)
101  %2 = extractvalue {i32, i1} %1, 0
102  %3 = extractvalue {i32, i1} %1, 1
103  %4 = select i1 %3, i32 %a1, i32 %2
104  ret i32 %4
105}
106
107define <4 x i32> @combine_vec_usub_self(<4 x i32> %a0, <4 x i32> %a1) {
108; SSE-LABEL: combine_vec_usub_self:
109; SSE:       # %bb.0:
110; SSE-NEXT:    xorps %xmm0, %xmm0
111; SSE-NEXT:    retq
112;
113; AVX-LABEL: combine_vec_usub_self:
114; AVX:       # %bb.0:
115; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
116; AVX-NEXT:    retq
117  %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0)
118  %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
119  %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
120  %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
121  ret <4 x i32> %4
122}
123
124; fold (usub -1, x) -> (xor x, -1) + no borrow
125define i32 @combine_usub_negone(i32 %a0, i32 %a1) {
126; CHECK-LABEL: combine_usub_negone:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    movl %edi, %eax
129; CHECK-NEXT:    notl %eax
130; CHECK-NEXT:    retq
131  %1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 -1, i32 %a0)
132  %2 = extractvalue {i32, i1} %1, 0
133  %3 = extractvalue {i32, i1} %1, 1
134  %4 = select i1 %3, i32 %a1, i32 %2
135  ret i32 %4
136}
137
138define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) {
139; SSE-LABEL: combine_vec_usub_negone:
140; SSE:       # %bb.0:
141; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
142; SSE-NEXT:    pxor %xmm1, %xmm0
143; SSE-NEXT:    retq
144;
145; AVX-LABEL: combine_vec_usub_negone:
146; AVX:       # %bb.0:
147; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
148; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
149; AVX-NEXT:    retq
150  %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a0)
151  %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
152  %3 = extractvalue {<4 x i32>, <4 x i1>} %1, 1
153  %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2
154  ret <4 x i32> %4
155}
156
157define { i32, i1 } @combine_usub_nuw(i32 %a, i32 %b) {
158; CHECK-LABEL: combine_usub_nuw:
159; CHECK:       # %bb.0:
160; CHECK-NEXT:    movl %edi, %eax
161; CHECK-NEXT:    orl $-2147483648, %eax # imm = 0x80000000
162; CHECK-NEXT:    andl $2147483647, %esi # imm = 0x7FFFFFFF
163; CHECK-NEXT:    subl %esi, %eax
164; CHECK-NEXT:    xorl %edx, %edx
165; CHECK-NEXT:    retq
166  %aa = or i32 %a, 2147483648
167  %bb = and i32 %b, 2147483647
168  %x = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %aa, i32 %bb)
169  ret { i32, i1 } %x
170}
171
172define { i8, i1 } @usub_always_overflow(i8 %x) nounwind {
173; CHECK-LABEL: usub_always_overflow:
174; CHECK:       # %bb.0:
175; CHECK-NEXT:    orb $64, %dil
176; CHECK-NEXT:    movb $63, %al
177; CHECK-NEXT:    subb %dil, %al
178; CHECK-NEXT:    setb %dl
179; CHECK-NEXT:    retq
180  %y = or i8 %x, 64
181  %a = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 63, i8 %y)
182  ret { i8, i1 } %a
183}
184
185define { i8, i1 } @ssub_always_overflow(i8 %x) nounwind {
186; CHECK-LABEL: ssub_always_overflow:
187; CHECK:       # %bb.0:
188; CHECK-NEXT:    cmpb $30, %dil
189; CHECK-NEXT:    movl $29, %ecx
190; CHECK-NEXT:    cmovgel %edi, %ecx
191; CHECK-NEXT:    movb $-100, %al
192; CHECK-NEXT:    subb %cl, %al
193; CHECK-NEXT:    seto %dl
194; CHECK-NEXT:    retq
195  %c = icmp sgt i8 %x, 29
196  %y = select i1 %c, i8 %x, i8 29
197  %a = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 -100, i8 %y)
198  ret { i8, i1 } %a
199}
200
201define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind {
202; SSE-LABEL: always_usub_const_vector:
203; SSE:       # %bb.0:
204; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
205; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
206; SSE-NEXT:    retq
207;
208; AVX-LABEL: always_usub_const_vector:
209; AVX:       # %bb.0:
210; AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
211; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
212; AVX-NEXT:    retq
213  %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 0, i8 0, i8 0, i8 0>, <4 x i8> <i8 1, i8 1, i8 1, i8 1>)
214  ret { <4 x i8>, <4 x i1> } %x
215}
216
217define { <4 x i8>, <4 x i1> } @never_usub_const_vector() nounwind {
218; SSE-LABEL: never_usub_const_vector:
219; SSE:       # %bb.0:
220; SSE-NEXT:    movss {{.*#+}} xmm0 = [127,255,0,254,0,0,0,0,0,0,0,0,0,0,0,0]
221; SSE-NEXT:    xorps %xmm1, %xmm1
222; SSE-NEXT:    retq
223;
224; AVX-LABEL: never_usub_const_vector:
225; AVX:       # %bb.0:
226; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [127,255,0,254,0,0,0,0,0,0,0,0,0,0,0,0]
227; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
228; AVX-NEXT:    retq
229  %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 255, i8 255, i8 255, i8 255>, <4 x i8> <i8 128, i8 0, i8 255, i8 1>)
230  ret { <4 x i8>, <4 x i1> } %x
231}
232