xref: /llvm-project/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll (revision e0ed0333f0fed2e73f805afd58b61176a87aa3ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefixes=CHECK,CHECK-V7
3; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi | FileCheck %s -check-prefixes=CHECK,CHECK-V8
4
5
6define i32 @f(i32 %a, i32 %b) nounwind ssp {
7; CHECK-LABEL: f:
8; CHECK:       @ %bb.0: @ %entry
9; CHECK-NEXT:    subs r0, r0, r1
10; CHECK-NEXT:    movle r0, #0
11; CHECK-NEXT:    bx lr
12entry:
13  %cmp = icmp sgt i32 %a, %b
14  %sub = sub nsw i32 %a, %b
15  %sub. = select i1 %cmp, i32 %sub, i32 0
16  ret i32 %sub.
17}
18
19define i32 @g(i32 %a, i32 %b) nounwind ssp {
20; CHECK-LABEL: g:
21; CHECK:       @ %bb.0: @ %entry
22; CHECK-NEXT:    subs r0, r1, r0
23; CHECK-NEXT:    movle r0, #0
24; CHECK-NEXT:    bx lr
25entry:
26  %cmp = icmp slt i32 %a, %b
27  %sub = sub nsw i32 %b, %a
28  %sub. = select i1 %cmp, i32 %sub, i32 0
29  ret i32 %sub.
30}
31
32define i32 @h(i32 %a, i32 %b) nounwind ssp {
33; CHECK-LABEL: h:
34; CHECK:       @ %bb.0: @ %entry
35; CHECK-NEXT:    subs r0, r0, #3
36; CHECK-NEXT:    movle r0, r1
37; CHECK-NEXT:    bx lr
38entry:
39  %cmp = icmp sgt i32 %a, 3
40  %sub = sub nsw i32 %a, 3
41  %sub. = select i1 %cmp, i32 %sub, i32 %b
42  ret i32 %sub.
43}
44
45; rdar://11725965
46define i32 @i(i32 %a, i32 %b) nounwind readnone ssp {
47; CHECK-LABEL: i:
48; CHECK:       @ %bb.0: @ %entry
49; CHECK-NEXT:    subs r0, r1, r0
50; CHECK-NEXT:    movls r0, #0
51; CHECK-NEXT:    bx lr
52entry:
53  %cmp = icmp ult i32 %a, %b
54  %sub = sub i32 %b, %a
55  %sub. = select i1 %cmp, i32 %sub, i32 0
56  ret i32 %sub.
57}
58
59; If CPSR is live-out, we can't remove cmp if there exists
60; a swapped sub.
61define i32 @j(i32 %a, i32 %b) nounwind {
62; CHECK-LABEL: j:
63; CHECK:       @ %bb.0: @ %entry
64; CHECK-NEXT:    subs r1, r0, r1
65; CHECK-NEXT:    movlt r0, r1
66; CHECK-NEXT:    movne r0, r1
67; CHECK-NEXT:    bx lr
68entry:
69  %cmp = icmp eq i32 %b, %a
70  %sub = sub nsw i32 %a, %b
71  br i1 %cmp, label %if.then, label %if.else
72
73if.then:
74  %cmp2 = icmp sgt i32 %b, %a
75  %sel = select i1 %cmp2, i32 %sub, i32 %a
76  ret i32 %sel
77
78if.else:
79  ret i32 %sub
80}
81
82; If the sub/rsb instruction is predicated, we can't use the flags.
83; <rdar://problem/12263428>
84; Test case from MultiSource/Benchmarks/Ptrdist/bc/number.s
85define i32 @bc_raise(i1 %cond) nounwind ssp {
86; CHECK-LABEL: bc_raise:
87; CHECK:       @ %bb.0: @ %entry
88; CHECK-NEXT:    mov r1, #1
89; CHECK-NEXT:    tst r0, #1
90; CHECK-NEXT:    bic r1, r1, r0
91; CHECK-NEXT:    mov r0, #23
92; CHECK-NEXT:    rsbeq r1, r1, #0
93; CHECK-NEXT:    cmp r1, #0
94; CHECK-NEXT:    movweq r0, #17
95; CHECK-NEXT:    bx lr
96entry:
97  %val.2.i = select i1 %cond, i32 0, i32 1
98  %sub.i = sub nsw i32 0, %val.2.i
99  %retval.0.i = select i1 %cond, i32 %val.2.i, i32 %sub.i
100  %cmp1 = icmp eq i32 %retval.0.i, 0
101  br i1 %cmp1, label %land.lhs.true, label %if.end11
102
103land.lhs.true:                                    ; preds = %num2long.exit
104  ret i32 17
105
106if.end11:                                         ; preds = %num2long.exit
107  ret i32 23
108}
109
110; When considering the producer of cmp's src as the subsuming instruction,
111; only consider that when the comparison is to 0.
112define i32 @cmp_src_nonzero(i32 %a, i32 %b, i32 %x, i32 %y) {
113; CHECK-LABEL: cmp_src_nonzero:
114; CHECK:       @ %bb.0: @ %entry
115; CHECK-NEXT:    sub r0, r0, r1
116; CHECK-NEXT:    cmp r0, #17
117; CHECK-NEXT:    movne r2, r3
118; CHECK-NEXT:    mov r0, r2
119; CHECK-NEXT:    bx lr
120entry:
121  %sub = sub i32 %a, %b
122  %cmp = icmp eq i32 %sub, 17
123  %ret = select i1 %cmp, i32 %x, i32 %y
124  ret i32 %ret
125}
126
127define float @float_sel(i32 %a, i32 %b, float %x, float %y) {
128; CHECK-V7-LABEL: float_sel:
129; CHECK-V7:       @ %bb.0: @ %entry
130; CHECK-V7-NEXT:    vmov s2, r2
131; CHECK-V7-NEXT:    subs r0, r0, r1
132; CHECK-V7-NEXT:    vmov s0, r3
133; CHECK-V7-NEXT:    vmoveq.f32 s0, s2
134; CHECK-V7-NEXT:    vmov r0, s0
135; CHECK-V7-NEXT:    bx lr
136;
137; CHECK-V8-LABEL: float_sel:
138; CHECK-V8:       @ %bb.0: @ %entry
139; CHECK-V8-NEXT:    subs r0, r0, r1
140; CHECK-V8-NEXT:    vmov s0, r3
141; CHECK-V8-NEXT:    vmov s2, r2
142; CHECK-V8-NEXT:    vseleq.f32 s0, s2, s0
143; CHECK-V8-NEXT:    vmov r0, s0
144; CHECK-V8-NEXT:    bx lr
145entry:
146  %sub = sub i32 %a, %b
147  %cmp = icmp eq i32 %sub, 0
148  %ret = select i1 %cmp, float %x, float %y
149  ret float %ret
150}
151
152define double @double_sel(i32 %a, i32 %b, double %x, double %y) {
153; CHECK-V7-LABEL: double_sel:
154; CHECK-V7:       @ %bb.0: @ %entry
155; CHECK-V7-NEXT:    vmov d17, r2, r3
156; CHECK-V7-NEXT:    vldr d16, [sp]
157; CHECK-V7-NEXT:    subs r0, r0, r1
158; CHECK-V7-NEXT:    vmoveq.f64 d16, d17
159; CHECK-V7-NEXT:    vmov r0, r1, d16
160; CHECK-V7-NEXT:    bx lr
161;
162; CHECK-V8-LABEL: double_sel:
163; CHECK-V8:       @ %bb.0: @ %entry
164; CHECK-V8-NEXT:    vldr d16, [sp]
165; CHECK-V8-NEXT:    vmov d17, r2, r3
166; CHECK-V8-NEXT:    subs r0, r0, r1
167; CHECK-V8-NEXT:    vseleq.f64 d16, d17, d16
168; CHECK-V8-NEXT:    vmov r0, r1, d16
169; CHECK-V8-NEXT:    bx lr
170entry:
171  %sub = sub i32 %a, %b
172  %cmp = icmp eq i32 %sub, 0
173  %ret = select i1 %cmp, double %x, double %y
174  ret double %ret
175}
176
177@t = common global i32 0
178define double @double_sub(i32 %a, i32 %b, double %x, double %y) {
179; CHECK-V7-LABEL: double_sub:
180; CHECK-V7:       @ %bb.0: @ %entry
181; CHECK-V7-NEXT:    vmov d17, r2, r3
182; CHECK-V7-NEXT:    cmp r0, r1
183; CHECK-V7-NEXT:    vldr d16, [sp]
184; CHECK-V7-NEXT:    sub r0, r0, r1
185; CHECK-V7-NEXT:    vmovgt.f64 d16, d17
186; CHECK-V7-NEXT:    movw r1, :lower16:t
187; CHECK-V7-NEXT:    movt r1, :upper16:t
188; CHECK-V7-NEXT:    str r0, [r1]
189; CHECK-V7-NEXT:    vmov r2, r3, d16
190; CHECK-V7-NEXT:    mov r0, r2
191; CHECK-V7-NEXT:    mov r1, r3
192; CHECK-V7-NEXT:    bx lr
193;
194; CHECK-V8-LABEL: double_sub:
195; CHECK-V8:       @ %bb.0: @ %entry
196; CHECK-V8-NEXT:    vldr d16, [sp]
197; CHECK-V8-NEXT:    cmp r0, r1
198; CHECK-V8-NEXT:    vmov d17, r2, r3
199; CHECK-V8-NEXT:    sub r0, r0, r1
200; CHECK-V8-NEXT:    vselgt.f64 d16, d17, d16
201; CHECK-V8-NEXT:    movw r1, :lower16:t
202; CHECK-V8-NEXT:    vmov r2, r3, d16
203; CHECK-V8-NEXT:    movt r1, :upper16:t
204; CHECK-V8-NEXT:    str r0, [r1]
205; CHECK-V8-NEXT:    mov r0, r2
206; CHECK-V8-NEXT:    mov r1, r3
207; CHECK-V8-NEXT:    bx lr
208entry:
209  %cmp = icmp sgt i32 %a, %b
210  %sub = sub i32 %a, %b
211  store i32 %sub, ptr @t
212  %ret = select i1 %cmp, double %x, double %y
213  ret double %ret
214}
215
216define double @double_sub_swap(i32 %a, i32 %b, double %x, double %y) {
217; CHECK-V7-LABEL: double_sub_swap:
218; CHECK-V7:       @ %bb.0: @ %entry
219; CHECK-V7-NEXT:    vmov d17, r2, r3
220; CHECK-V7-NEXT:    cmp r1, r0
221; CHECK-V7-NEXT:    vldr d16, [sp]
222; CHECK-V7-NEXT:    sub r0, r1, r0
223; CHECK-V7-NEXT:    vmovlt.f64 d16, d17
224; CHECK-V7-NEXT:    movw r1, :lower16:t
225; CHECK-V7-NEXT:    movt r1, :upper16:t
226; CHECK-V7-NEXT:    str r0, [r1]
227; CHECK-V7-NEXT:    vmov r2, r3, d16
228; CHECK-V7-NEXT:    mov r0, r2
229; CHECK-V7-NEXT:    mov r1, r3
230; CHECK-V7-NEXT:    bx lr
231;
232; CHECK-V8-LABEL: double_sub_swap:
233; CHECK-V8:       @ %bb.0: @ %entry
234; CHECK-V8-NEXT:    vldr d16, [sp]
235; CHECK-V8-NEXT:    cmp r1, r0
236; CHECK-V8-NEXT:    vmov d17, r2, r3
237; CHECK-V8-NEXT:    sub r0, r1, r0
238; CHECK-V8-NEXT:    vselge.f64 d16, d16, d17
239; CHECK-V8-NEXT:    movw r1, :lower16:t
240; CHECK-V8-NEXT:    vmov r2, r3, d16
241; CHECK-V8-NEXT:    movt r1, :upper16:t
242; CHECK-V8-NEXT:    str r0, [r1]
243; CHECK-V8-NEXT:    mov r0, r2
244; CHECK-V8-NEXT:    mov r1, r3
245; CHECK-V8-NEXT:    bx lr
246entry:
247  %cmp = icmp sgt i32 %a, %b
248  %sub = sub i32 %b, %a
249  %ret = select i1 %cmp, double %x, double %y
250  store i32 %sub, ptr @t
251  ret double %ret
252}
253
254declare void @abort()
255declare void @exit(i32)
256
257; If the comparison uses the V bit (signed overflow/underflow), we can't
258; omit the comparison.
259define i32 @cmp_slt0(i32 %a, i32 %b, i32 %x, i32 %y) {
260; CHECK-LABEL: cmp_slt0:
261; CHECK:       @ %bb.0: @ %entry
262; CHECK-NEXT:    .save {r11, lr}
263; CHECK-NEXT:    push {r11, lr}
264; CHECK-NEXT:    movw r0, :lower16:t
265; CHECK-NEXT:    movt r0, :upper16:t
266; CHECK-NEXT:    ldr r0, [r0]
267; CHECK-NEXT:    sub r0, r0, #17
268; CHECK-NEXT:    cmn r0, #1
269; CHECK-NEXT:    ble .LBB11_2
270; CHECK-NEXT:  @ %bb.1: @ %if.else
271; CHECK-NEXT:    mov r0, #0
272; CHECK-NEXT:    bl exit
273; CHECK-NEXT:  .LBB11_2: @ %if.then
274; CHECK-NEXT:    bl abort
275entry:
276  %load = load i32, ptr @t, align 4
277  %sub = sub i32 %load, 17
278  %cmp = icmp slt i32 %sub, 0
279  br i1 %cmp, label %if.then, label %if.else
280
281if.then:
282  call void @abort()
283  unreachable
284
285if.else:
286  call void @exit(i32 0)
287  unreachable
288}
289
290; Same for the C bit. (Note the ult X, 0 is trivially
291; false, so the DAG combiner may or may not optimize it).
292define i32 @cmp_ult0(i32 %a, i32 %b, i32 %x, i32 %y) {
293; CHECK-LABEL: cmp_ult0:
294; CHECK:       @ %bb.0: @ %entry
295; CHECK-NEXT:    .save {r11, lr}
296; CHECK-NEXT:    push {r11, lr}
297; CHECK-NEXT:    movw r0, :lower16:t
298; CHECK-NEXT:    movt r0, :upper16:t
299; CHECK-NEXT:    ldr r0, [r0]
300; CHECK-NEXT:    sub r0, r0, #17
301; CHECK-NEXT:    cmp r0, #0
302; CHECK-NEXT:    bhs .LBB12_2
303; CHECK-NEXT:  @ %bb.1: @ %if.then
304; CHECK-NEXT:    bl abort
305; CHECK-NEXT:  .LBB12_2: @ %if.else
306; CHECK-NEXT:    mov r0, #0
307; CHECK-NEXT:    bl exit
308entry:
309  %load = load i32, ptr @t, align 4
310  %sub = sub i32 %load, 17
311  %cmp = icmp ult i32 %sub, 0
312  br i1 %cmp, label %if.then, label %if.else
313
314if.then:
315  call void @abort()
316  unreachable
317
318if.else:
319  call void @exit(i32 0)
320  unreachable
321}
322
323