xref: /llvm-project/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll (revision db158c7c830807caeeb0691739c41f1d522029e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 -sink-insts-to-avoid-spills | FileCheck %s
3
4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5
6@A = external dso_local global [100 x i32], align 4
7
8define i32 @sink_load_and_copy(i32 %n) {
9; CHECK-LABEL: sink_load_and_copy:
10; CHECK:       // %bb.0: // %entry
11; CHECK-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
12; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
13; CHECK-NEXT:    .cfi_def_cfa_offset 32
14; CHECK-NEXT:    .cfi_offset w19, -8
15; CHECK-NEXT:    .cfi_offset w20, -16
16; CHECK-NEXT:    .cfi_offset w21, -24
17; CHECK-NEXT:    .cfi_offset w30, -32
18; CHECK-NEXT:    mov w19, w0
19; CHECK-NEXT:    cmp w0, #1
20; CHECK-NEXT:    b.lt .LBB0_3
21; CHECK-NEXT:  // %bb.1: // %for.body.preheader
22; CHECK-NEXT:    adrp x8, A
23; CHECK-NEXT:    mov w20, w19
24; CHECK-NEXT:    ldr w21, [x8, :lo12:A]
25; CHECK-NEXT:  .LBB0_2: // %for.body
26; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
27; CHECK-NEXT:    mov w0, w21
28; CHECK-NEXT:    bl _Z3usei
29; CHECK-NEXT:    sdiv w20, w20, w0
30; CHECK-NEXT:    subs w19, w19, #1
31; CHECK-NEXT:    b.ne .LBB0_2
32; CHECK-NEXT:    b .LBB0_4
33; CHECK-NEXT:  .LBB0_3:
34; CHECK-NEXT:    mov w20, w19
35; CHECK-NEXT:  .LBB0_4: // %for.cond.cleanup
36; CHECK-NEXT:    mov w0, w20
37; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
38; CHECK-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
39; CHECK-NEXT:    ret
40entry:
41  %cmp63 = icmp sgt i32 %n, 0
42  br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup
43
44for.body.preheader:
45  %0 = load i32, ptr @A, align 4
46  br label %for.body
47
48for.cond.cleanup:
49  %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
50  ret i32 %sum.0.lcssa
51
52for.body:
53  %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
54  %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
55  %call = tail call i32 @_Z3usei(i32 %0)
56  %div = sdiv i32 %sum.065, %call
57  %lsr.iv.next = add i32 %lsr.iv, -1
58  %exitcond.not = icmp eq i32 %lsr.iv.next, 0
59  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
60}
61
62define i32 @cant_sink_successive_call(i32 %n) {
63; CHECK-LABEL: cant_sink_successive_call:
64; CHECK:       // %bb.0: // %entry
65; CHECK-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
66; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
67; CHECK-NEXT:    .cfi_def_cfa_offset 32
68; CHECK-NEXT:    .cfi_offset w19, -8
69; CHECK-NEXT:    .cfi_offset w20, -16
70; CHECK-NEXT:    .cfi_offset w21, -24
71; CHECK-NEXT:    .cfi_offset w30, -32
72; CHECK-NEXT:    mov w19, w0
73; CHECK-NEXT:    cmp w0, #1
74; CHECK-NEXT:    b.lt .LBB1_3
75; CHECK-NEXT:  // %bb.1: // %for.body.preheader
76; CHECK-NEXT:    adrp x8, A
77; CHECK-NEXT:    mov w0, w19
78; CHECK-NEXT:    ldr w20, [x8, :lo12:A]
79; CHECK-NEXT:    bl _Z3usei
80; CHECK-NEXT:    mov w21, w19
81; CHECK-NEXT:  .LBB1_2: // %for.body
82; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
83; CHECK-NEXT:    mov w0, w20
84; CHECK-NEXT:    bl _Z3usei
85; CHECK-NEXT:    sdiv w21, w21, w0
86; CHECK-NEXT:    subs w19, w19, #1
87; CHECK-NEXT:    b.ne .LBB1_2
88; CHECK-NEXT:    b .LBB1_4
89; CHECK-NEXT:  .LBB1_3:
90; CHECK-NEXT:    mov w21, w19
91; CHECK-NEXT:  .LBB1_4: // %for.cond.cleanup
92; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
93; CHECK-NEXT:    mov w0, w21
94; CHECK-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
95; CHECK-NEXT:    ret
96entry:
97  %cmp63 = icmp sgt i32 %n, 0
98  br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup
99
100for.body.preheader:
101  %0 = load i32, ptr @A, align 4
102  %call0 = tail call i32 @_Z3usei(i32 %n)
103  br label %for.body
104
105for.cond.cleanup:
106  %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
107  ret i32 %sum.0.lcssa
108
109for.body:
110  %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
111  %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
112  %call = tail call i32 @_Z3usei(i32 %0)
113  %div = sdiv i32 %sum.065, %call
114  %lsr.iv.next = add i32 %lsr.iv, -1
115  %exitcond.not = icmp eq i32 %lsr.iv.next, 0
116  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
117}
118
119define i32 @cant_sink_successive_store(ptr nocapture readnone %store, i32 %n) {
120; CHECK-LABEL: cant_sink_successive_store:
121; CHECK:       // %bb.0: // %entry
122; CHECK-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
123; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
124; CHECK-NEXT:    .cfi_def_cfa_offset 32
125; CHECK-NEXT:    .cfi_offset w19, -8
126; CHECK-NEXT:    .cfi_offset w20, -16
127; CHECK-NEXT:    .cfi_offset w21, -24
128; CHECK-NEXT:    .cfi_offset w30, -32
129; CHECK-NEXT:    mov w19, w1
130; CHECK-NEXT:    cmp w1, #1
131; CHECK-NEXT:    b.lt .LBB2_3
132; CHECK-NEXT:  // %bb.1: // %for.body.preheader
133; CHECK-NEXT:    adrp x8, A
134; CHECK-NEXT:    mov w21, w19
135; CHECK-NEXT:    ldr w20, [x8, :lo12:A]
136; CHECK-NEXT:    mov w8, #42 // =0x2a
137; CHECK-NEXT:    str w8, [x0]
138; CHECK-NEXT:  .LBB2_2: // %for.body
139; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
140; CHECK-NEXT:    mov w0, w20
141; CHECK-NEXT:    bl _Z3usei
142; CHECK-NEXT:    sdiv w21, w21, w0
143; CHECK-NEXT:    subs w19, w19, #1
144; CHECK-NEXT:    b.ne .LBB2_2
145; CHECK-NEXT:    b .LBB2_4
146; CHECK-NEXT:  .LBB2_3:
147; CHECK-NEXT:    mov w21, w19
148; CHECK-NEXT:  .LBB2_4: // %for.cond.cleanup
149; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
150; CHECK-NEXT:    mov w0, w21
151; CHECK-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
152; CHECK-NEXT:    ret
153entry:
154  %cmp63 = icmp sgt i32 %n, 0
155  br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup
156
157for.body.preheader:
158  %0 = load i32, ptr @A, align 4
159  store i32 42, ptr %store, align 4
160  br label %for.body
161
162for.cond.cleanup:
163  %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
164  ret i32 %sum.0.lcssa
165
166for.body:
167  %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
168  %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
169  %call = tail call i32 @_Z3usei(i32 %0)
170  %div = sdiv i32 %sum.065, %call
171  %lsr.iv.next = add i32 %lsr.iv, -1
172  %exitcond.not = icmp eq i32 %lsr.iv.next, 0
173  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
174}
175
176declare i32 @_Z3usei(i32)
177