xref: /llvm-project/llvm/test/CodeGen/AArch64/sink-and-fold.ll (revision c83f23d6abb6f8d693c643bc1b43f9b9e06bc537)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s | FileCheck %s
3target triple = "aarch64-linux"
4
5declare i32 @use(...)
6
7define i32 @f0(i1 %c1, ptr %p) nounwind {
8; CHECK-LABEL: f0:
9; CHECK:       // %bb.0: // %entry
10; CHECK-NEXT:    tbz w0, #0, .LBB0_2
11; CHECK-NEXT:  // %bb.1: // %if.then
12; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
13; CHECK-NEXT:    add x0, x1, #8
14; CHECK-NEXT:    bl use
15; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
16; CHECK-NEXT:    ret
17; CHECK-NEXT:  .LBB0_2: // %if.else
18; CHECK-NEXT:    ldur w0, [x1, #8]
19; CHECK-NEXT:    ret
20entry:
21  %a = getelementptr i32, ptr %p, i32 2
22  br i1 %c1, label %if.then, label %if.else
23
24if.then:
25  %v0 = call i32 @use(ptr %a)
26  br label %exit
27
28if.else:
29  %v1 = load i32, ptr %a
30  br label %exit
31
32exit:
33  %v = phi i32 [%v0, %if.then], [%v1, %if.else]
34  ret i32 %v
35}
36
37define i32 @f1(i1 %c1, ptr %p, i64 %i) nounwind {
38; CHECK-LABEL: f1:
39; CHECK:       // %bb.0: // %entry
40; CHECK-NEXT:    tbz w0, #0, .LBB1_2
41; CHECK-NEXT:  // %bb.1: // %if.then
42; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
43; CHECK-NEXT:    add x0, x1, x2
44; CHECK-NEXT:    bl use
45; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
46; CHECK-NEXT:    ret
47; CHECK-NEXT:  .LBB1_2: // %if.else
48; CHECK-NEXT:    ldr w0, [x1, x2]
49; CHECK-NEXT:    ret
50entry:
51  %a = getelementptr i8, ptr %p, i64 %i
52  br i1 %c1, label %if.then, label %if.else
53
54if.then:
55  %v0 = call i32 @use(ptr %a)
56  br label %exit
57
58if.else:
59  %v1 = load i32, ptr %a
60  br label %exit
61
62exit:
63  %v = phi i32 [%v0, %if.then], [%v1, %if.else]
64  ret i32 %v
65}
66
67; Address calculation too slow.
68%S = type {i32, [7 x i32] }
69define i32 @f2(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+alu-lsl-fast" {
70; CHECK-LABEL: f2:
71; CHECK:       // %bb.0: // %entry
72; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
73; CHECK-NEXT:    add x1, x1, x2, lsl #5
74; CHECK-NEXT:    tbz w0, #0, .LBB2_2
75; CHECK-NEXT:  // %bb.1: // %if.then
76; CHECK-NEXT:    mov x0, x1
77; CHECK-NEXT:    bl use
78; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
79; CHECK-NEXT:    ret
80; CHECK-NEXT:  .LBB2_2: // %if.else
81; CHECK-NEXT:    mov w0, #1 // =0x1
82; CHECK-NEXT:    bl use
83; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
84; CHECK-NEXT:    ret
85entry:
86  %a = getelementptr %S, ptr %p, i64 %i
87  br i1 %c1, label %if.then, label %if.else
88
89if.then:
90  %v0 = call i32 @use(ptr %a)
91  br label %exit
92
93if.else:
94  %v1 = call i32 @use(i32 1, ptr %a)
95  br label %exit
96
97exit:
98  %v = phi i32 [%v0, %if.then], [%v1, %if.else]
99  ret i32 %v
100}
101
102; Address calculation cheap enough on some cores.
103define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind  "target-features"="+alu-lsl-fast" {
104; CHECK-LABEL: f3:
105; CHECK:       // %bb.0: // %entry
106; CHECK-NEXT:    tbz w0, #0, .LBB3_2
107; CHECK-NEXT:  // %bb.1: // %if.then
108; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
109; CHECK-NEXT:    add x0, x1, x2, lsl #2
110; CHECK-NEXT:    bl use
111; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
112; CHECK-NEXT:    ret
113; CHECK-NEXT:  .LBB3_2: // %if.else
114; CHECK-NEXT:    ldr w0, [x1, x2, lsl #2]
115; CHECK-NEXT:    ret
116entry:
117  %a = getelementptr i32, ptr %p, i64 %i
118  br i1 %c1, label %if.then, label %if.else
119
120if.then:
121  %v0 = call i32 @use(ptr %a)
122  br label %exit
123
124if.else:
125  %v1 = load i32, ptr %a
126  br label %exit
127
128exit:
129  %v = phi i32 [%v0, %if.then], [%v1, %if.else]
130  ret i32 %v
131}
132
133define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" {
134; CHECK-LABEL: f4:
135; CHECK:       // %bb.0: // %entry
136; CHECK-NEXT:    cmp x1, #1
137; CHECK-NEXT:    b.lt .LBB4_9
138; CHECK-NEXT:  // %bb.1: // %LI.preheader
139; CHECK-NEXT:    stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
140; CHECK-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
141; CHECK-NEXT:    mov x22, xzr
142; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
143; CHECK-NEXT:    mov x19, x1
144; CHECK-NEXT:    mov x20, x0
145; CHECK-NEXT:    b .LBB4_3
146; CHECK-NEXT:  .LBB4_2: // %LI.latch
147; CHECK-NEXT:    // in Loop: Header=BB4_3 Depth=1
148; CHECK-NEXT:    cmp x22, x19
149; CHECK-NEXT:    mov x22, x23
150; CHECK-NEXT:    b.ge .LBB4_8
151; CHECK-NEXT:  .LBB4_3: // %LI
152; CHECK-NEXT:    // =>This Loop Header: Depth=1
153; CHECK-NEXT:    // Child Loop BB4_6 Depth 2
154; CHECK-NEXT:    mov x21, xzr
155; CHECK-NEXT:    add x23, x22, #1
156; CHECK-NEXT:    b .LBB4_6
157; CHECK-NEXT:  .LBB4_4: // %if.else
158; CHECK-NEXT:    // in Loop: Header=BB4_6 Depth=2
159; CHECK-NEXT:    ldr w0, [x20, x22, lsl #2]
160; CHECK-NEXT:  .LBB4_5: // %LJ.latch
161; CHECK-NEXT:    // in Loop: Header=BB4_6 Depth=2
162; CHECK-NEXT:    add x8, x21, #1
163; CHECK-NEXT:    str w0, [x20, x21, lsl #2]
164; CHECK-NEXT:    sub x9, x8, #1
165; CHECK-NEXT:    mov x21, x8
166; CHECK-NEXT:    cmp x9, x19
167; CHECK-NEXT:    b.ge .LBB4_2
168; CHECK-NEXT:  .LBB4_6: // %LJ
169; CHECK-NEXT:    // Parent Loop BB4_3 Depth=1
170; CHECK-NEXT:    // => This Inner Loop Header: Depth=2
171; CHECK-NEXT:    ldr w8, [x20, x21, lsl #2]
172; CHECK-NEXT:    tbz w8, #31, .LBB4_4
173; CHECK-NEXT:  // %bb.7: // %if.then
174; CHECK-NEXT:    // in Loop: Header=BB4_6 Depth=2
175; CHECK-NEXT:    add x0, x20, x22, lsl #2
176; CHECK-NEXT:    mov x1, x21
177; CHECK-NEXT:    bl use
178; CHECK-NEXT:    b .LBB4_5
179; CHECK-NEXT:  .LBB4_8:
180; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
181; CHECK-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
182; CHECK-NEXT:    ldp x30, x23, [sp], #48 // 16-byte Folded Reload
183; CHECK-NEXT:  .LBB4_9: // %exit
184; CHECK-NEXT:    ret
185entry:
186    %c0 = icmp slt i64 %n, 1
187    br i1 %c0, label %exit, label %LI
188
189LI:
190    %i = phi i64 [0, %entry], [%i.next, %LI.latch]
191    %i.next = add i64 %i, 1
192    %ai.ptr = getelementptr i32, ptr %a, i64 %i
193    br label %LJ
194
195LJ:
196    %j = phi i64 [0, %LI], [%j.next, %LJ.latch]
197    %j.next = add i64 %j, 1
198    %aj.ptr = getelementptr i32, ptr %a, i64 %j
199    %aj = load i32, ptr %aj.ptr
200    %c1 = icmp slt i32 %aj, 0
201    br i1 %c1, label %if.then, label %if.else
202
203if.then:
204    %v = call i32 @use(ptr %ai.ptr, i64 %j)
205    store i32 %v, ptr %aj.ptr
206    br label %LJ.latch
207
208if.else:
209    %ai = load i32, ptr %ai.ptr
210    store i32 %ai, ptr %aj.ptr
211    br label %LJ.latch
212
213LJ.latch:
214    %c2 = icmp slt i64 %j, %n
215    br i1 %c2, label %LJ, label %LI.latch
216
217LI.latch:
218    %c3 = icmp slt i64 %i, %n
219    br i1 %c3, label %LI, label %exit
220
221exit:
222    ret void
223}
224
225%T = type { i32, i32, i32 }
226
227define void @f5(ptr %a, i32 %n, i32 %k) nounwind {
228; CHECK-LABEL: f5:
229; CHECK:       // %bb.0: // %entry
230; CHECK-NEXT:    cmp w1, #1
231; CHECK-NEXT:    b.lt .LBB5_7
232; CHECK-NEXT:  // %bb.1: // %L.preheader
233; CHECK-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
234; CHECK-NEXT:    mov w8, #12 // =0xc
235; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
236; CHECK-NEXT:    mov w19, w1
237; CHECK-NEXT:    smaddl x20, w2, w8, x0
238; CHECK-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
239; CHECK-NEXT:    add x21, x0, #8
240; CHECK-NEXT:    mov w22, #-1 // =0xffffffff
241; CHECK-NEXT:    b .LBB5_4
242; CHECK-NEXT:  .LBB5_2: // %if.else
243; CHECK-NEXT:    // in Loop: Header=BB5_4 Depth=1
244; CHECK-NEXT:    ldur w0, [x20, #4]
245; CHECK-NEXT:  .LBB5_3: // %L.latch
246; CHECK-NEXT:    // in Loop: Header=BB5_4 Depth=1
247; CHECK-NEXT:    add w22, w22, #1
248; CHECK-NEXT:    str w0, [x21], #12
249; CHECK-NEXT:    cmp w22, w19
250; CHECK-NEXT:    b.ge .LBB5_6
251; CHECK-NEXT:  .LBB5_4: // %L
252; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
253; CHECK-NEXT:    ldr w8, [x21]
254; CHECK-NEXT:    tbz w8, #31, .LBB5_2
255; CHECK-NEXT:  // %bb.5: // %if.then
256; CHECK-NEXT:    // in Loop: Header=BB5_4 Depth=1
257; CHECK-NEXT:    add x0, x20, #4
258; CHECK-NEXT:    add w1, w22, #1
259; CHECK-NEXT:    bl use
260; CHECK-NEXT:    b .LBB5_3
261; CHECK-NEXT:  .LBB5_6:
262; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
263; CHECK-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
264; CHECK-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
265; CHECK-NEXT:  .LBB5_7: // %exit
266; CHECK-NEXT:    ret
267entry:
268    %p = getelementptr %T, ptr %a, i32 %k, i32 1
269    %c0 = icmp slt i32 %n, 1
270    br i1 %c0, label %exit, label %L
271
272L:
273    %i = phi i32 [0, %entry], [%i.next, %L.latch]
274    %i.next = add i32 %i, 1
275    %ai.ptr = getelementptr %T, ptr %a, i32 %i, i32 2
276    %ai = load i32, ptr %ai.ptr
277    %c1 = icmp slt i32 %ai, 0
278    br i1 %c1, label %if.then, label %if.else
279
280if.then:
281    %u.0 = call i32 @use(ptr %p, i32 %i)
282    br label %L.latch
283
284if.else:
285    %u.1 = load i32, ptr %p
286    br label %L.latch
287
288L.latch:
289    %u = phi i32 [%u.0, %if.then], [%u.1, %if.else]
290    store i32 %u, ptr %ai.ptr
291    %c2 = icmp slt i32 %i, %n
292    br i1 %c2, label %L, label %exit
293
294exit:
295    ret void
296}
297
298define i32 @f6(i1 %c, ptr %a, i32 %i) {
299; CHECK-LABEL: f6:
300; CHECK:       // %bb.0: // %entry
301; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
302; CHECK-NEXT:    tbz w0, #0, .LBB6_2
303; CHECK-NEXT:  // %bb.1: // %if.then
304; CHECK-NEXT:    mov w0, wzr
305; CHECK-NEXT:    str wzr, [x1, w2, sxtw #2]
306; CHECK-NEXT:    ret
307; CHECK-NEXT:  .LBB6_2: // %if.else
308; CHECK-NEXT:    ldr w0, [x1, w2, sxtw #2]
309; CHECK-NEXT:    ret
310entry:
311    %j = sext i32 %i to i64
312    br i1 %c, label %if.then, label %if.else
313
314if.then:
315    %p0 = getelementptr i32, ptr %a, i64 %j
316    store i32 0, ptr %p0
317    br label %exit
318
319if.else:
320    %p1 = getelementptr i32, ptr %a, i64 %j
321    %v0 = load i32, ptr %p1
322    br label %exit
323
324exit:
325    %v = phi i32 [0, %if.then], [%v0, %if.else]
326    ret i32 %v
327}
328
329define i8 @f7(i1 %c, ptr %a, i32 %i) {
330; CHECK-LABEL: f7:
331; CHECK:       // %bb.0: // %entry
332; CHECK-NEXT:    tbz w0, #0, .LBB7_2
333; CHECK-NEXT:  // %bb.1: // %if.then
334; CHECK-NEXT:    mov w0, wzr
335; CHECK-NEXT:    strb wzr, [x1, w2, uxtw]
336; CHECK-NEXT:    ret
337; CHECK-NEXT:  .LBB7_2: // %if.else
338; CHECK-NEXT:    ldrb w0, [x1, w2, uxtw]
339; CHECK-NEXT:    ret
340entry:
341    %j = zext i32 %i to i64
342    br i1 %c, label %if.then, label %if.else
343
344if.then:
345    %p0 = getelementptr i8, ptr %a, i64 %j
346    store i8 0, ptr %p0
347    br label %exit
348
349if.else:
350    %p1 = getelementptr i8, ptr %a, i64 %j
351    %v0 = load i8, ptr %p1
352    br label %exit
353
354exit:
355    %v = phi i8 [0, %if.then], [%v0, %if.else]
356    ret i8 %v
357}
358
359define i32 @f8(i1 %c, ptr %a, i32 %i) {
360; CHECK-LABEL: f8:
361; CHECK:       // %bb.0: // %entry
362; CHECK-NEXT:    tbz w0, #0, .LBB8_2
363; CHECK-NEXT:  // %bb.1: // %if.then
364; CHECK-NEXT:    mov w0, wzr
365; CHECK-NEXT:    str wzr, [x1, w2, sxtw #2]
366; CHECK-NEXT:    ret
367; CHECK-NEXT:  .LBB8_2: // %if.else
368; CHECK-NEXT:    ldr w0, [x1, w2, sxtw #2]
369; CHECK-NEXT:    ret
370entry:
371    %p = getelementptr i32, ptr %a, i32 %i
372    br i1 %c, label %if.then, label %if.else
373
374if.then:
375    store i32 0, ptr %p
376    br label %exit
377
378if.else:
379    %v0 = load i32, ptr %p
380    br label %exit
381
382exit:
383    %v = phi i32 [0, %if.then], [%v0, %if.else]
384    ret i32 %v
385}
386
387define i64 @f9(i1 %c, ptr %a, i32 %i) {
388; CHECK-LABEL: f9:
389; CHECK:       // %bb.0: // %entry
390; CHECK-NEXT:    tbz w0, #0, .LBB9_2
391; CHECK-NEXT:  // %bb.1: // %if.then
392; CHECK-NEXT:    mov x0, xzr
393; CHECK-NEXT:    str xzr, [x1, w2, uxtw #3]
394; CHECK-NEXT:    ret
395; CHECK-NEXT:  .LBB9_2: // %if.else
396; CHECK-NEXT:    ldr x0, [x1, w2, uxtw #3]
397; CHECK-NEXT:    ret
398entry:
399    %j = zext i32 %i to i64
400    %p = getelementptr i64, ptr %a, i64 %j
401    br i1 %c, label %if.then, label %if.else
402
403if.then:
404    store i64 0, ptr %p
405    br label %exit
406
407if.else:
408    %v0 = load i64, ptr %p
409    br label %exit
410
411exit:
412    %v = phi i64 [0, %if.then], [%v0, %if.else]
413    ret i64 %v
414}
415