xref: /llvm-project/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll (revision 686f4599cfa444aa62db4e22bf752f3d9614c30d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=ARM %s
3; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=THUMB %s
4; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck -check-prefix=T2 %s
5; RUN: llc -mtriple=thumbv8-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=V8 %s
6
7; FIXME: The -mtriple=thumb test doesn't change if -disable-peephole is specified.
8
9%struct.Foo = type { ptr }
10
11define ptr @foo(ptr %this, i32 %acc) nounwind readonly align 2 {
12; ARM-LABEL: foo:
13; ARM:       @ %bb.0: @ %entry
14; ARM-NEXT:    add r2, r0, #4
15; ARM-NEXT:    mov r12, #1
16; ARM-NEXT:    b .LBB0_3
17; ARM-NEXT:  .LBB0_1: @ %tailrecurse.switch
18; ARM-NEXT:    @ in Loop: Header=BB0_3 Depth=1
19; ARM-NEXT:    cmp r3, #1
20; ARM-NEXT:    movne pc, lr
21; ARM-NEXT:  .LBB0_2: @ %sw.bb
22; ARM-NEXT:    @ in Loop: Header=BB0_3 Depth=1
23; ARM-NEXT:    orr r1, r3, r1, lsl #1
24; ARM-NEXT:    add r2, r2, #4
25; ARM-NEXT:    add r12, r12, #1
26; ARM-NEXT:  .LBB0_3: @ %tailrecurse
27; ARM-NEXT:    @ =>This Inner Loop Header: Depth=1
28; ARM-NEXT:    ldr r3, [r2, #-4]
29; ARM-NEXT:    ands r3, r3, #3
30; ARM-NEXT:    beq .LBB0_2
31; ARM-NEXT:  @ %bb.4: @ %tailrecurse.switch
32; ARM-NEXT:    @ in Loop: Header=BB0_3 Depth=1
33; ARM-NEXT:    cmp r3, #3
34; ARM-NEXT:    moveq r0, r2
35; ARM-NEXT:    moveq pc, lr
36; ARM-NEXT:  .LBB0_5: @ %tailrecurse.switch
37; ARM-NEXT:    @ in Loop: Header=BB0_3 Depth=1
38; ARM-NEXT:    cmp r3, #2
39; ARM-NEXT:    bne .LBB0_1
40; ARM-NEXT:  @ %bb.6: @ %sw.bb8
41; ARM-NEXT:    add r1, r1, r12
42; ARM-NEXT:    add r0, r0, r1, lsl #2
43; ARM-NEXT:    mov pc, lr
44;
45; THUMB-LABEL: foo:
46; THUMB:       @ %bb.0: @ %entry
47; THUMB-NEXT:    .save {r4, r5, r7, lr}
48; THUMB-NEXT:    push {r4, r5, r7, lr}
49; THUMB-NEXT:    movs r2, #1
50; THUMB-NEXT:    movs r3, r0
51; THUMB-NEXT:  .LBB0_1: @ %tailrecurse
52; THUMB-NEXT:    @ =>This Inner Loop Header: Depth=1
53; THUMB-NEXT:    ldr r5, [r3]
54; THUMB-NEXT:    movs r4, #3
55; THUMB-NEXT:    ands r4, r5
56; THUMB-NEXT:    beq .LBB0_5
57; THUMB-NEXT:  @ %bb.2: @ %tailrecurse.switch
58; THUMB-NEXT:    @ in Loop: Header=BB0_1 Depth=1
59; THUMB-NEXT:    cmp r4, #3
60; THUMB-NEXT:    beq .LBB0_6
61; THUMB-NEXT:  @ %bb.3: @ %tailrecurse.switch
62; THUMB-NEXT:    @ in Loop: Header=BB0_1 Depth=1
63; THUMB-NEXT:    cmp r4, #2
64; THUMB-NEXT:    beq .LBB0_7
65; THUMB-NEXT:  @ %bb.4: @ %tailrecurse.switch
66; THUMB-NEXT:    @ in Loop: Header=BB0_1 Depth=1
67; THUMB-NEXT:    cmp r4, #1
68; THUMB-NEXT:    bne .LBB0_9
69; THUMB-NEXT:  .LBB0_5: @ %sw.bb
70; THUMB-NEXT:    @ in Loop: Header=BB0_1 Depth=1
71; THUMB-NEXT:    lsls r1, r1, #1
72; THUMB-NEXT:    orrs r4, r1
73; THUMB-NEXT:    adds r3, r3, #4
74; THUMB-NEXT:    adds r2, r2, #1
75; THUMB-NEXT:    movs r1, r4
76; THUMB-NEXT:    b .LBB0_1
77; THUMB-NEXT:  .LBB0_6: @ %sw.bb6
78; THUMB-NEXT:    adds r0, r3, #4
79; THUMB-NEXT:    b .LBB0_8
80; THUMB-NEXT:  .LBB0_7: @ %sw.bb8
81; THUMB-NEXT:    adds r1, r1, r2
82; THUMB-NEXT:    lsls r1, r1, #2
83; THUMB-NEXT:    adds r0, r0, r1
84; THUMB-NEXT:  .LBB0_8: @ %sw.bb6
85; THUMB-NEXT:    pop {r4, r5, r7}
86; THUMB-NEXT:    pop {r1}
87; THUMB-NEXT:    bx r1
88; THUMB-NEXT:  .LBB0_9: @ %sw.epilog
89; THUMB-NEXT:    pop {r4, r5, r7}
90; THUMB-NEXT:    pop {r0}
91; THUMB-NEXT:    bx r0
92;
93; T2-LABEL: foo:
94; T2:       @ %bb.0: @ %entry
95; T2-NEXT:    adds r2, r0, #4
96; T2-NEXT:    mov.w r12, #1
97; T2-NEXT:    b .LBB0_3
98; T2-NEXT:  .LBB0_1: @ %tailrecurse.switch
99; T2-NEXT:    @ in Loop: Header=BB0_3 Depth=1
100; T2-NEXT:    cmp r3, #1
101; T2-NEXT:    it ne
102; T2-NEXT:    bxne lr
103; T2-NEXT:  .LBB0_2: @ %sw.bb
104; T2-NEXT:    @ in Loop: Header=BB0_3 Depth=1
105; T2-NEXT:    orr.w r1, r3, r1, lsl #1
106; T2-NEXT:    adds r2, #4
107; T2-NEXT:    add.w r12, r12, #1
108; T2-NEXT:  .LBB0_3: @ %tailrecurse
109; T2-NEXT:    @ =>This Inner Loop Header: Depth=1
110; T2-NEXT:    ldr r3, [r2, #-4]
111; T2-NEXT:    ands r3, r3, #3
112; T2-NEXT:    beq .LBB0_2
113; T2-NEXT:  @ %bb.4: @ %tailrecurse.switch
114; T2-NEXT:    @ in Loop: Header=BB0_3 Depth=1
115; T2-NEXT:    cmp r3, #3
116; T2-NEXT:    itt eq
117; T2-NEXT:    moveq r0, r2
118; T2-NEXT:    bxeq lr
119; T2-NEXT:  .LBB0_5: @ %tailrecurse.switch
120; T2-NEXT:    @ in Loop: Header=BB0_3 Depth=1
121; T2-NEXT:    cmp r3, #2
122; T2-NEXT:    bne .LBB0_1
123; T2-NEXT:  @ %bb.6: @ %sw.bb8
124; T2-NEXT:    add r1, r12
125; T2-NEXT:    add.w r0, r0, r1, lsl #2
126; T2-NEXT:    bx lr
127;
128; V8-LABEL: foo:
129; V8:       @ %bb.0: @ %entry
130; V8-NEXT:    adds r2, r0, #4
131; V8-NEXT:    mov.w r12, #1
132; V8-NEXT:    b .LBB0_3
133; V8-NEXT:  .LBB0_1: @ %tailrecurse.switch
134; V8-NEXT:    @ in Loop: Header=BB0_3 Depth=1
135; V8-NEXT:    cmp r3, #1
136; V8-NEXT:    it ne
137; V8-NEXT:    bxne lr
138; V8-NEXT:  .LBB0_2: @ %sw.bb
139; V8-NEXT:    @ in Loop: Header=BB0_3 Depth=1
140; V8-NEXT:    orr.w r1, r3, r1, lsl #1
141; V8-NEXT:    adds r2, #4
142; V8-NEXT:    add.w r12, r12, #1
143; V8-NEXT:  .LBB0_3: @ %tailrecurse
144; V8-NEXT:    @ =>This Inner Loop Header: Depth=1
145; V8-NEXT:    ldr r3, [r2, #-4]
146; V8-NEXT:    ands r3, r3, #3
147; V8-NEXT:    beq .LBB0_2
148; V8-NEXT:  @ %bb.4: @ %tailrecurse.switch
149; V8-NEXT:    @ in Loop: Header=BB0_3 Depth=1
150; V8-NEXT:    cmp r3, #3
151; V8-NEXT:    itt eq
152; V8-NEXT:    moveq r0, r2
153; V8-NEXT:    bxeq lr
154; V8-NEXT:  .LBB0_5: @ %tailrecurse.switch
155; V8-NEXT:    @ in Loop: Header=BB0_3 Depth=1
156; V8-NEXT:    cmp r3, #2
157; V8-NEXT:    bne .LBB0_1
158; V8-NEXT:  @ %bb.6: @ %sw.bb8
159; V8-NEXT:    add r1, r12
160; V8-NEXT:    add.w r0, r0, r1, lsl #2
161; V8-NEXT:    bx lr
162entry:
163  %scevgep = getelementptr %struct.Foo, ptr %this, i32 1
164  br label %tailrecurse
165
166tailrecurse:                                      ; preds = %sw.bb, %entry
167  %lsr.iv2 = phi ptr [ %scevgep3, %sw.bb ], [ %scevgep, %entry ]
168  %lsr.iv = phi i32 [ %lsr.iv.next, %sw.bb ], [ 1, %entry ]
169  %acc.tr = phi i32 [ %or, %sw.bb ], [ %acc, %entry ]
170  %scevgep5 = getelementptr ptr, ptr %lsr.iv2, i32 -1
171  %tmp2 = load ptr, ptr %scevgep5
172  %0 = ptrtoint ptr %tmp2 to i32
173
174
175
176
177  %and = and i32 %0, 3
178  %tst = icmp eq i32 %and, 0
179  br i1 %tst, label %sw.bb, label %tailrecurse.switch
180
181tailrecurse.switch:                               ; preds = %tailrecurse
182
183  switch i32 %and, label %sw.epilog [
184    i32 1, label %sw.bb
185    i32 3, label %sw.bb6
186    i32 2, label %sw.bb8
187  ], !prof !1
188
189sw.bb:                                            ; preds = %tailrecurse.switch, %tailrecurse
190  %shl = shl i32 %acc.tr, 1
191  %or = or i32 %and, %shl
192  %lsr.iv.next = add i32 %lsr.iv, 1
193  %scevgep3 = getelementptr %struct.Foo, ptr %lsr.iv2, i32 1
194  br label %tailrecurse
195
196sw.bb6:                                           ; preds = %tailrecurse.switch
197  ret ptr %lsr.iv2
198
199sw.bb8:                                           ; preds = %tailrecurse.switch
200  %tmp1 = add i32 %acc.tr, %lsr.iv
201  %add.ptr11 = getelementptr inbounds %struct.Foo, ptr %this, i32 %tmp1
202  ret ptr %add.ptr11
203
204sw.epilog:                                        ; preds = %tailrecurse.switch
205  ret ptr undef
206}
207
208; Another test that exercises the AND/TST peephole optimization and also
209; generates a predicated ANDS instruction. Check that the predicate is printed
210; after the "S" modifier on the instruction.
211
212%struct.S = type { ptr, [1 x i8] }
213
214define internal zeroext i8 @bar(ptr %x, ptr nocapture %y) nounwind readonly {
215; ARM-LABEL: bar:
216; ARM:       @ %bb.0: @ %entry
217; ARM-NEXT:    ldrb r2, [r0, #4]
218; ARM-NEXT:    ands r2, r2, #112
219; ARM-NEXT:    ldrbne r1, [r1, #4]
220; ARM-NEXT:    andsne r1, r1, #112
221; ARM-NEXT:    beq .LBB1_2
222; ARM-NEXT:  @ %bb.1: @ %bb2
223; ARM-NEXT:    cmp r2, #16
224; ARM-NEXT:    cmpne r1, #16
225; ARM-NEXT:    andeq r0, r0, #255
226; ARM-NEXT:    moveq pc, lr
227; ARM-NEXT:  .LBB1_2: @ %return
228; ARM-NEXT:    mov r0, #1
229; ARM-NEXT:    mov pc, lr
230;
231; THUMB-LABEL: bar:
232; THUMB:       @ %bb.0: @ %entry
233; THUMB-NEXT:    ldrb r2, [r0, #4]
234; THUMB-NEXT:    movs r3, #112
235; THUMB-NEXT:    ands r2, r3
236; THUMB-NEXT:    beq .LBB1_4
237; THUMB-NEXT:  @ %bb.1: @ %bb
238; THUMB-NEXT:    ldrb r1, [r1, #4]
239; THUMB-NEXT:    ands r1, r3
240; THUMB-NEXT:    beq .LBB1_4
241; THUMB-NEXT:  @ %bb.2: @ %bb2
242; THUMB-NEXT:    cmp r2, #16
243; THUMB-NEXT:    beq .LBB1_5
244; THUMB-NEXT:  @ %bb.3: @ %bb2
245; THUMB-NEXT:    cmp r1, #16
246; THUMB-NEXT:    beq .LBB1_5
247; THUMB-NEXT:  .LBB1_4: @ %return
248; THUMB-NEXT:    movs r0, #1
249; THUMB-NEXT:    bx lr
250; THUMB-NEXT:  .LBB1_5: @ %bb4
251; THUMB-NEXT:    movs r1, #255
252; THUMB-NEXT:    ands r0, r1
253; THUMB-NEXT:    bx lr
254;
255; T2-LABEL: bar:
256; T2:       @ %bb.0: @ %entry
257; T2-NEXT:    ldrb r2, [r0, #4]
258; T2-NEXT:    ands r2, r2, #112
259; T2-NEXT:    itt ne
260; T2-NEXT:    ldrbne r1, [r1, #4]
261; T2-NEXT:    andsne r1, r1, #112
262; T2-NEXT:    beq .LBB1_2
263; T2-NEXT:  @ %bb.1: @ %bb2
264; T2-NEXT:    cmp r2, #16
265; T2-NEXT:    itee ne
266; T2-NEXT:    cmpne r1, #16
267; T2-NEXT:    uxtbeq r0, r0
268; T2-NEXT:    bxeq lr
269; T2-NEXT:  .LBB1_2: @ %return
270; T2-NEXT:    movs r0, #1
271; T2-NEXT:    bx lr
272;
273; V8-LABEL: bar:
274; V8:       @ %bb.0: @ %entry
275; V8-NEXT:    ldrb r2, [r0, #4]
276; V8-NEXT:    ands r2, r2, #112
277; V8-NEXT:    itt ne
278; V8-NEXT:    ldrbne r1, [r1, #4]
279; V8-NEXT:    andsne r1, r1, #112
280; V8-NEXT:    beq .LBB1_2
281; V8-NEXT:  @ %bb.1: @ %bb2
282; V8-NEXT:    cmp r2, #16
283; V8-NEXT:    itee ne
284; V8-NEXT:    cmpne r1, #16
285; V8-NEXT:    uxtbeq r0, r0
286; V8-NEXT:    bxeq lr
287; V8-NEXT:  .LBB1_2: @ %return
288; V8-NEXT:    movs r0, #1
289; V8-NEXT:    bx lr
290entry:
291  %0 = getelementptr inbounds %struct.S, ptr %x, i32 0, i32 1, i32 0
292  %1 = load i8, ptr %0, align 1
293  %2 = zext i8 %1 to i32
294  %3 = and i32 %2, 112
295  %4 = icmp eq i32 %3, 0
296  br i1 %4, label %return, label %bb
297
298bb:                                               ; preds = %entry
299  %5 = getelementptr inbounds %struct.S, ptr %y, i32 0, i32 1, i32 0
300  %6 = load i8, ptr %5, align 1
301  %7 = zext i8 %6 to i32
302  %8 = and i32 %7, 112
303  %9 = icmp eq i32 %8, 0
304  br i1 %9, label %return, label %bb2
305
306bb2:                                              ; preds = %bb
307  %10 = icmp eq i32 %3, 16
308  %11 = icmp eq i32 %8, 16
309  %or.cond = or i1 %10, %11
310  br i1 %or.cond, label %bb4, label %return
311
312bb4:                                              ; preds = %bb2
313  %12 = ptrtoint ptr %x to i32
314  %phitmp = trunc i32 %12 to i8
315  ret i8 %phitmp
316
317return:                                           ; preds = %bb2, %bb, %entry
318  ret i8 1
319}
320
321
322; We were looking through multiple COPY instructions to find an AND we might
323; fold into a TST, but in doing so we changed the register being tested allowing
324; folding of unrelated tests (in this case, a TST against r1 was eliminated in
325; favour of an AND of r0).
326
327define i32 @test_tst_assessment(i32 %a, i32 %b) {
328; ARM-LABEL: test_tst_assessment:
329; ARM:       @ %bb.0:
330; ARM-NEXT:    and r0, r0, #1
331; ARM-NEXT:    tst r1, #1
332; ARM-NEXT:    subne r0, r0, #1
333; ARM-NEXT:    mov pc, lr
334;
335; THUMB-LABEL: test_tst_assessment:
336; THUMB:       @ %bb.0:
337; THUMB-NEXT:    movs r2, r0
338; THUMB-NEXT:    movs r0, #1
339; THUMB-NEXT:    ands r0, r2
340; THUMB-NEXT:    lsls r1, r1, #31
341; THUMB-NEXT:    beq .LBB2_2
342; THUMB-NEXT:  @ %bb.1:
343; THUMB-NEXT:    subs r0, r0, #1
344; THUMB-NEXT:  .LBB2_2:
345; THUMB-NEXT:    bx lr
346;
347; T2-LABEL: test_tst_assessment:
348; T2:       @ %bb.0:
349; T2-NEXT:    and r0, r0, #1
350; T2-NEXT:    lsls r1, r1, #31
351; T2-NEXT:    it ne
352; T2-NEXT:    subne r0, #1
353; T2-NEXT:    bx lr
354;
355; V8-LABEL: test_tst_assessment:
356; V8:       @ %bb.0:
357; V8-NEXT:    and r0, r0, #1
358; V8-NEXT:    lsls r1, r1, #31
359; V8-NEXT:    it ne
360; V8-NEXT:    subne r0, #1
361; V8-NEXT:    bx lr
362  %and1 = and i32 %a, 1
363  %sub = sub i32 %and1, 1
364  %and2 = and i32 %b, 1
365  %cmp = icmp eq i32 %and2, 0
366  %sel = select i1 %cmp, i32 %and1, i32 %sub
367  ret i32 %sel
368}
369
370!1 = !{!"branch_weights", i32 1, i32 1, i32 3, i32 2 }
371