xref: /llvm-project/llvm/test/CodeGen/X86/combine-sbb.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
4
5%WideUInt32 = type { i32, i32 }
6
7define void @PR25858_i32(ptr sret(%WideUInt32), ptr, ptr) nounwind {
8; X86-LABEL: PR25858_i32:
9; X86:       # %bb.0: # %top
10; X86-NEXT:    pushl %esi
11; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
12; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
13; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
14; X86-NEXT:    movl (%edx), %esi
15; X86-NEXT:    movl 4(%edx), %edx
16; X86-NEXT:    subl (%ecx), %esi
17; X86-NEXT:    sbbl 4(%ecx), %edx
18; X86-NEXT:    movl %edx, 4(%eax)
19; X86-NEXT:    movl %esi, (%eax)
20; X86-NEXT:    popl %esi
21; X86-NEXT:    retl $4
22;
23; X64-LABEL: PR25858_i32:
24; X64:       # %bb.0: # %top
25; X64-NEXT:    movq %rdi, %rax
26; X64-NEXT:    movl (%rsi), %ecx
27; X64-NEXT:    movl 4(%rsi), %esi
28; X64-NEXT:    subl (%rdx), %ecx
29; X64-NEXT:    sbbl 4(%rdx), %esi
30; X64-NEXT:    movl %esi, 4(%rdi)
31; X64-NEXT:    movl %ecx, (%rdi)
32; X64-NEXT:    retq
33top:
34  %3 = load i32, ptr %1, align 4
35  %4 = load i32, ptr %2, align 4
36  %5 = sub i32 %3, %4
37  %6 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %3, i32 %4)
38  %7 = extractvalue { i32, i1 } %6, 1
39  %8 = getelementptr inbounds %WideUInt32, ptr %1, i32 0, i32 1
40  %9 = load i32, ptr %8, align 8
41  %10 = getelementptr inbounds %WideUInt32, ptr %2, i32 0, i32 1
42  %11 = load i32, ptr %10, align 8
43  %12 = sub i32 %9, %11
44  %.neg1 = sext i1 %7 to i32
45  %13 = add i32 %12, %.neg1
46  %14 = insertvalue %WideUInt32 undef, i32 %5, 0
47  %15 = insertvalue %WideUInt32 %14, i32 %13, 1
48  store %WideUInt32 %15, ptr %0, align 4
49  ret void
50}
51
52declare  { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)
53
54%WideUInt64 = type { i64, i64 }
55
56define void @PR25858_i64(ptr sret(%WideUInt64), ptr, ptr) nounwind {
57; X86-LABEL: PR25858_i64:
58; X86:       # %bb.0: # %top
59; X86-NEXT:    pushl %ebx
60; X86-NEXT:    pushl %edi
61; X86-NEXT:    pushl %esi
62; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
63; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
64; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
65; X86-NEXT:    movl (%edi), %edx
66; X86-NEXT:    movl 4(%edi), %esi
67; X86-NEXT:    movl 12(%edi), %ecx
68; X86-NEXT:    movl 8(%edi), %edi
69; X86-NEXT:    subl 8(%ebx), %edi
70; X86-NEXT:    sbbl 12(%ebx), %ecx
71; X86-NEXT:    subl (%ebx), %edx
72; X86-NEXT:    sbbl 4(%ebx), %esi
73; X86-NEXT:    sbbl $0, %edi
74; X86-NEXT:    sbbl $0, %ecx
75; X86-NEXT:    movl %edx, (%eax)
76; X86-NEXT:    movl %esi, 4(%eax)
77; X86-NEXT:    movl %edi, 8(%eax)
78; X86-NEXT:    movl %ecx, 12(%eax)
79; X86-NEXT:    popl %esi
80; X86-NEXT:    popl %edi
81; X86-NEXT:    popl %ebx
82; X86-NEXT:    retl $4
83;
84; X64-LABEL: PR25858_i64:
85; X64:       # %bb.0: # %top
86; X64-NEXT:    movq %rdi, %rax
87; X64-NEXT:    movq (%rsi), %rcx
88; X64-NEXT:    movq 8(%rsi), %rsi
89; X64-NEXT:    subq (%rdx), %rcx
90; X64-NEXT:    sbbq 8(%rdx), %rsi
91; X64-NEXT:    movq %rsi, 8(%rdi)
92; X64-NEXT:    movq %rcx, (%rdi)
93; X64-NEXT:    retq
94top:
95  %3 = load i64, ptr %1, align 8
96  %4 = load i64, ptr %2, align 8
97  %5 = sub i64 %3, %4
98  %6 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %3, i64 %4)
99  %7 = extractvalue { i64, i1 } %6, 1
100  %8 = getelementptr inbounds %WideUInt64, ptr %1, i64 0, i32 1
101  %9 = load i64, ptr %8, align 8
102  %10 = getelementptr inbounds %WideUInt64, ptr %2, i64 0, i32 1
103  %11 = load i64, ptr %10, align 8
104  %12 = sub i64 %9, %11
105  %.neg1 = sext i1 %7 to i64
106  %13 = add i64 %12, %.neg1
107  %14 = insertvalue %WideUInt64 undef, i64 %5, 0
108  %15 = insertvalue %WideUInt64 %14, i64 %13, 1
109  store %WideUInt64 %15, ptr %0, align 8
110  ret void
111}
112
113declare  { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64)
114
115; PR24545 less_than_ideal()
116define i8 @PR24545(i32, i32, ptr nocapture readonly) {
117; X86-LABEL: PR24545:
118; X86:       # %bb.0:
119; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
120; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
121; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
122; X86-NEXT:    cmpl (%ecx), %edx
123; X86-NEXT:    sbbl 4(%ecx), %eax
124; X86-NEXT:    setb %al
125; X86-NEXT:    retl
126;
127; X64-LABEL: PR24545:
128; X64:       # %bb.0:
129; X64-NEXT:    cmpl (%rdx), %edi
130; X64-NEXT:    sbbl 4(%rdx), %esi
131; X64-NEXT:    setb %al
132; X64-NEXT:    retq
133  %4 = load i32, ptr %2
134  %5 = icmp ugt i32 %4, %0
135  %6 = zext i1 %5 to i8
136  %7 = getelementptr inbounds i32, ptr %2, i32 1
137  %8 = load i32, ptr %7
138  %9 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 %6, i32 %1, i32 %8)
139  %10 = extractvalue { i8, i32 } %9, 0
140  %11 = icmp ne i8 %10, 0
141  %12 = zext i1 %11 to i8
142  ret i8 %12
143}
144
145define i32 @PR40483_sub1(ptr, i32) nounwind {
146; X86-LABEL: PR40483_sub1:
147; X86:       # %bb.0:
148; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
149; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
150; X86-NEXT:    subl %eax, (%ecx)
151; X86-NEXT:    xorl %eax, %eax
152; X86-NEXT:    retl
153;
154; X64-LABEL: PR40483_sub1:
155; X64:       # %bb.0:
156; X64-NEXT:    subl %esi, (%rdi)
157; X64-NEXT:    xorl %eax, %eax
158; X64-NEXT:    retq
159  %3 = load i32, ptr %0, align 4
160  %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
161  %5 = extractvalue { i8, i32 } %4, 1
162  store i32 %5, ptr %0, align 4
163  %6 = sub i32 %1, %3
164  %7 = add i32 %6, %5
165  ret i32 %7
166}
167
168define i32 @PR40483_sub2(ptr, i32) nounwind {
169; X86-LABEL: PR40483_sub2:
170; X86:       # %bb.0:
171; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
172; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
173; X86-NEXT:    subl %eax, (%ecx)
174; X86-NEXT:    xorl %eax, %eax
175; X86-NEXT:    retl
176;
177; X64-LABEL: PR40483_sub2:
178; X64:       # %bb.0:
179; X64-NEXT:    subl %esi, (%rdi)
180; X64-NEXT:    xorl %eax, %eax
181; X64-NEXT:    retq
182  %3 = load i32, ptr %0, align 4
183  %4 = sub i32 %3, %1
184  %5 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
185  %6 = extractvalue { i8, i32 } %5, 1
186  store i32 %6, ptr %0, align 4
187  %7 = sub i32 %4, %6
188  ret i32 %7
189}
190
191define i32 @PR40483_sub3(ptr, i32) nounwind {
192; X86-LABEL: PR40483_sub3:
193; X86:       # %bb.0:
194; X86-NEXT:    pushl %esi
195; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
196; X86-NEXT:    movl (%eax), %edx
197; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
198; X86-NEXT:    movl %edx, %ecx
199; X86-NEXT:    subl %esi, %ecx
200; X86-NEXT:    subl %esi, %edx
201; X86-NEXT:    movl %edx, (%eax)
202; X86-NEXT:    jae .LBB5_1
203; X86-NEXT:  # %bb.2:
204; X86-NEXT:    xorl %eax, %eax
205; X86-NEXT:    popl %esi
206; X86-NEXT:    retl
207; X86-NEXT:  .LBB5_1:
208; X86-NEXT:    movl %ecx, %eax
209; X86-NEXT:    negl %eax
210; X86-NEXT:    orl %ecx, %eax
211; X86-NEXT:    popl %esi
212; X86-NEXT:    retl
213;
214; X64-LABEL: PR40483_sub3:
215; X64:       # %bb.0:
216; X64-NEXT:    movl (%rdi), %ecx
217; X64-NEXT:    movl %ecx, %eax
218; X64-NEXT:    subl %esi, %eax
219; X64-NEXT:    movl %eax, %edx
220; X64-NEXT:    negl %edx
221; X64-NEXT:    orl %eax, %edx
222; X64-NEXT:    xorl %eax, %eax
223; X64-NEXT:    subl %esi, %ecx
224; X64-NEXT:    movl %ecx, (%rdi)
225; X64-NEXT:    cmovael %edx, %eax
226; X64-NEXT:    retq
227  %3 = load i32, ptr %0, align 8
228  %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
229  %5 = extractvalue { i8, i32 } %4, 1
230  store i32 %5, ptr %0, align 8
231  %6 = extractvalue { i8, i32 } %4, 0
232  %7 = icmp eq i8 %6, 0
233  %8 = sub i32 %1, %3
234  %9 = or i32 %5, %8
235  %10 = select i1 %7, i32 %9, i32 0
236  ret i32 %10
237}
238
239define i32 @PR40483_sub4(ptr, i32) nounwind {
240; X86-LABEL: PR40483_sub4:
241; X86:       # %bb.0:
242; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
243; X86-NEXT:    movl (%edx), %ecx
244; X86-NEXT:    xorl %eax, %eax
245; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
246; X86-NEXT:    movl %ecx, (%edx)
247; X86-NEXT:    jae .LBB6_2
248; X86-NEXT:  # %bb.1:
249; X86-NEXT:    movl %ecx, %eax
250; X86-NEXT:  .LBB6_2:
251; X86-NEXT:    retl
252;
253; X64-LABEL: PR40483_sub4:
254; X64:       # %bb.0:
255; X64-NEXT:    movl (%rdi), %eax
256; X64-NEXT:    xorl %ecx, %ecx
257; X64-NEXT:    subl %esi, %eax
258; X64-NEXT:    movl %eax, (%rdi)
259; X64-NEXT:    cmovael %ecx, %eax
260; X64-NEXT:    retq
261  %3 = load i32, ptr %0, align 8
262  %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
263  %5 = extractvalue { i8, i32 } %4, 1
264  store i32 %5, ptr %0, align 8
265  %6 = extractvalue { i8, i32 } %4, 0
266  %7 = icmp eq i8 %6, 0
267  %8 = sub i32 %3, %1
268  %9 = or i32 %5, %8
269  %10 = select i1 %7, i32 0, i32 %9
270  ret i32 %10
271}
272
273; Verify that a bogus cmov is simplified.
274
275define i32 @PR40483_sub5(ptr, i32) nounwind {
276; X86-LABEL: PR40483_sub5:
277; X86:       # %bb.0:
278; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
279; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
280; X86-NEXT:    subl %eax, (%ecx)
281; X86-NEXT:    xorl %eax, %eax
282; X86-NEXT:    retl
283;
284; X64-LABEL: PR40483_sub5:
285; X64:       # %bb.0:
286; X64-NEXT:    subl %esi, (%rdi)
287; X64-NEXT:    xorl %eax, %eax
288; X64-NEXT:    retq
289  %3 = load i32, ptr %0, align 8
290  %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
291  %5 = extractvalue { i8, i32 } %4, 1
292  store i32 %5, ptr %0, align 8
293  %6 = extractvalue { i8, i32 } %4, 0
294  %7 = icmp eq i8 %6, 0
295  %8 = sub i32 %1, %3
296  %9 = add i32 %8, %5
297  %10 = select i1 %7, i32 %9, i32 0
298  ret i32 %10
299}
300
301define i32 @PR40483_sub6(ptr, i32) nounwind {
302; X86-LABEL: PR40483_sub6:
303; X86:       # %bb.0:
304; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
305; X86-NEXT:    movl (%edx), %ecx
306; X86-NEXT:    xorl %eax, %eax
307; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
308; X86-NEXT:    movl %ecx, (%edx)
309; X86-NEXT:    jae .LBB8_2
310; X86-NEXT:  # %bb.1:
311; X86-NEXT:    leal (%ecx,%ecx), %eax
312; X86-NEXT:  .LBB8_2:
313; X86-NEXT:    retl
314;
315; X64-LABEL: PR40483_sub6:
316; X64:       # %bb.0:
317; X64-NEXT:    movl (%rdi), %eax
318; X64-NEXT:    xorl %ecx, %ecx
319; X64-NEXT:    subl %esi, %eax
320; X64-NEXT:    movl %eax, (%rdi)
321; X64-NEXT:    leal (%rax,%rax), %eax
322; X64-NEXT:    cmovael %ecx, %eax
323; X64-NEXT:    retq
324  %3 = load i32, ptr %0, align 8
325  %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
326  %5 = extractvalue { i8, i32 } %4, 1
327  store i32 %5, ptr %0, align 8
328  %6 = extractvalue { i8, i32 } %4, 0
329  %7 = icmp eq i8 %6, 0
330  %8 = sub i32 %3, %1
331  %9 = add i32 %8, %5
332  %10 = select i1 %7, i32 0, i32 %9
333  ret i32 %10
334}
335
336declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32)
337