xref: /llvm-project/llvm/test/CodeGen/ARM/v8m-tail-call.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc %s -o - -mtriple=thumbv8m.base | FileCheck %s
3
4declare i32 @g(...)
5
6declare i32 @h0(i32, i32, i32, i32)
7define hidden i32 @f0() {
8; CHECK-LABEL: f0:
9; CHECK:       @ %bb.0:
10; CHECK-NEXT:    push {r7, lr}
11; CHECK-NEXT:    bl g
12; CHECK-NEXT:    movs r1, #1
13; CHECK-NEXT:    movs r2, #2
14; CHECK-NEXT:    movs r3, #3
15; CHECK-NEXT:    ldr r7, [sp, #4]
16; CHECK-NEXT:    mov lr, r7
17; CHECK-NEXT:    pop {r7}
18; CHECK-NEXT:    add sp, #4
19; CHECK-NEXT:    b h0
20  %1 = tail call i32 @g()
21  %2 = tail call i32 @h0(i32 %1, i32 1, i32 2, i32 3)
22  ret i32 %2
23}
24
25declare i32 @h1(i32)
26define hidden i32 @f1() {
27; CHECK-LABEL: f1:
28; CHECK:       @ %bb.0:
29; CHECK-NEXT:    push {r7, lr}
30; CHECK-NEXT:    bl g
31; CHECK-NEXT:    pop {r7}
32; CHECK-NEXT:    pop {r1}
33; CHECK-NEXT:    mov lr, r1
34; CHECK-NEXT:    b h1
35  %1 = tail call i32 @g()
36  %2 = tail call i32 @h1(i32 %1)
37  ret i32 %2
38}
39
40declare i32 @h2(i32, i32, i32, i32, i32)
41define hidden i32 @f2(i32, i32, i32, i32, i32) {
42; CHECK-LABEL: f2:
43; CHECK:       @ %bb.0:
44; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
45; CHECK-NEXT:    sub sp, #4
46; CHECK-NEXT:    mov r4, r3
47; CHECK-NEXT:    mov r5, r2
48; CHECK-NEXT:    mov r6, r1
49; CHECK-NEXT:    ldr r7, [sp, #24]
50; CHECK-NEXT:    bl g
51; CHECK-NEXT:    cbz r0, .LBB2_2
52; CHECK-NEXT:  @ %bb.1:
53; CHECK-NEXT:    str r7, [sp, #24]
54; CHECK-NEXT:    mov r1, r6
55; CHECK-NEXT:    mov r2, r5
56; CHECK-NEXT:    mov r3, r4
57; CHECK-NEXT:    add sp, #4
58; CHECK-NEXT:    ldr r4, [sp, #16]
59; CHECK-NEXT:    mov lr, r4
60; CHECK-NEXT:    pop {r4, r5, r6, r7}
61; CHECK-NEXT:    add sp, #4
62; CHECK-NEXT:    b h2
63; CHECK-NEXT:  .LBB2_2:
64; CHECK-NEXT:    movs r0, #0
65; CHECK-NEXT:    mvns r0, r0
66; CHECK-NEXT:    add sp, #4
67; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
68  %6 = tail call i32 @g()
69  %7 = icmp eq i32 %6, 0
70  br i1 %7, label %10, label %8
71
72  %9 = tail call i32 @h2(i32 %6, i32 %1, i32 %2, i32 %3, i32 %4)
73  br label %10
74
75  %11 = phi i32 [ %9, %8 ], [ -1, %5 ]
76  ret i32 %11
77}
78
79; Make sure that tail calls to function pointers that require r0-r3 for argument
80; passing do not break the compiler.
81@fnptr = global ptr null
82define i32 @test3() {
83; CHECK-LABEL: test3:
84; CHECK:       @ %bb.0:
85; CHECK-NEXT:    push {r4, lr}
86; CHECK-NEXT:    movw r0, :lower16:fnptr
87; CHECK-NEXT:    movt r0, :upper16:fnptr
88; CHECK-NEXT:    ldr r4, [r0]
89; CHECK-NEXT:    movs r0, #1
90; CHECK-NEXT:    movs r1, #2
91; CHECK-NEXT:    movs r2, #3
92; CHECK-NEXT:    movs r3, #4
93; CHECK-NEXT:    blx r4
94; CHECK-NEXT:    pop {r4, pc}
95  %1 = load ptr, ptr @fnptr
96  %2 = tail call i32 %1(i32 1, i32 2, i32 3, i32 4)
97  ret i32 %2
98}
99
100@fnptr2 = global ptr null
101define i32 @test4() {
102; CHECK-LABEL: test4:
103; CHECK:       @ %bb.0:
104; CHECK-NEXT:    push {r4, lr}
105; CHECK-NEXT:    movw r0, :lower16:fnptr2
106; CHECK-NEXT:    movt r0, :upper16:fnptr2
107; CHECK-NEXT:    ldr r4, [r0]
108; CHECK-NEXT:    movs r0, #1
109; CHECK-NEXT:    movs r1, #2
110; CHECK-NEXT:    movs r2, #3
111; CHECK-NEXT:    movs r3, #0
112; CHECK-NEXT:    blx r4
113; CHECK-NEXT:    pop {r4, pc}
114  %1 = load ptr, ptr @fnptr2
115  %2 = tail call i32 %1(i32 1, i32 2, i64 3)
116  ret i32 %2
117}
118
119; Check that tail calls to function pointers where not all of r0-r3 are used for
120; parameter passing are tail-call optimized.
121; test5: params in r0, r1. r2 & r3 are free.
122@fnptr3 = global ptr null
123define i32 @test5() {
124; CHECK-LABEL: test5:
125; CHECK:       @ %bb.0:
126; CHECK-NEXT:    movw r0, :lower16:fnptr3
127; CHECK-NEXT:    movt r0, :upper16:fnptr3
128; CHECK-NEXT:    ldr r2, [r0]
129; CHECK-NEXT:    movs r0, #1
130; CHECK-NEXT:    movs r1, #2
131; CHECK-NEXT:    bx r2
132  %1 = load ptr, ptr @fnptr3
133  %2 = tail call i32 %1(i32 1, i32 2)
134  ret i32 %2
135}
136
137; test6: params in r0 and r2-r3. r1 is free.
138@fnptr4 = global ptr null
139define i32 @test6() {
140; CHECK-LABEL: test6:
141; CHECK:       @ %bb.0:
142; CHECK-NEXT:    movw r0, :lower16:fnptr4
143; CHECK-NEXT:    movt r0, :upper16:fnptr4
144; CHECK-NEXT:    ldr r1, [r0]
145; CHECK-NEXT:    movs r0, #1
146; CHECK-NEXT:    movs r2, #2
147; CHECK-NEXT:    movs r3, #0
148; CHECK-NEXT:    bx r1
149  %1 = load ptr, ptr @fnptr4
150  %2 = tail call i32 %1(i32 1, i64 2)
151  ret i32 %2
152}
153
154; Check that tail calls to functions other than function pointers are
155; tail-call optimized.
156define i32 @test7() {
157; CHECK-LABEL: test7:
158; CHECK:       @ %bb.0:
159; CHECK-NEXT:    movs r0, #1
160; CHECK-NEXT:    movs r1, #2
161; CHECK-NEXT:    movs r2, #3
162; CHECK-NEXT:    movs r3, #4
163; CHECK-NEXT:    b bar
164  %tail = tail call i32 @bar(i32 1, i32 2, i32 3, i32 4)
165  ret i32 %tail
166}
167
168declare i32 @bar(i32, i32, i32, i32)
169
170; Regression test for failure to load indirect branch target (class tcGPR) from
171; a stack slot.
172%struct.S = type { i32 }
173
174define void @test8(ptr nocapture %fn, i32 %x) local_unnamed_addr {
175; CHECK-LABEL: test8:
176; CHECK:       @ %bb.0: @ %entry
177; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
178; CHECK-NEXT:    sub sp, #4
179; CHECK-NEXT:    mov r4, r1
180; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
181; CHECK-NEXT:    bl test8_u
182; CHECK-NEXT:    mov r5, r0
183; CHECK-NEXT:    ldr r6, [r0]
184; CHECK-NEXT:    movs r7, #0
185; CHECK-NEXT:    mov r0, r7
186; CHECK-NEXT:    bl test8_h
187; CHECK-NEXT:    mov r1, r0
188; CHECK-NEXT:    mov r0, r6
189; CHECK-NEXT:    mov r2, r7
190; CHECK-NEXT:    bl test8_g
191; CHECK-NEXT:    str r4, [r5]
192; CHECK-NEXT:    movs r0, #1
193; CHECK-NEXT:    movs r1, #2
194; CHECK-NEXT:    movs r2, #3
195; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
196; CHECK-NEXT:    add sp, #4
197; CHECK-NEXT:    ldr r4, [sp, #16]
198; CHECK-NEXT:    mov lr, r4
199; CHECK-NEXT:    pop {r4, r5, r6, r7}
200; CHECK-NEXT:    add sp, #4
201; CHECK-NEXT:    bx r3
202entry:
203  %call = tail call ptr @test8_u()
204  %0 = load i32, ptr %call, align 4
205  %call1 = tail call i32 @test8_h(i32 0)
206  %call2 = tail call i32 @test8_g(i32 %0, i32 %call1, i32 0)
207  store i32 %x, ptr %call, align 4
208  %call4 = tail call i32 %fn(i32 1, i32 2, i32 3)
209  ret void
210}
211
212declare ptr @test8_u(...)
213
214declare i32 @test8_g(i32, i32, i32)
215
216declare i32 @test8_h(i32)
217
218; Check that we don't introduce an unnecessary spill of lr.
219declare i32 @h9(i32, i32, i32, i32)
220define i32 @test9(ptr %x, ptr %y, ptr %z, ptr %a) {
221; CHECK-LABEL: test9:
222; CHECK:       @ %bb.0:
223; CHECK-NEXT:    push {r4, r7}
224; CHECK-NEXT:    ldr r4, [r3]
225; CHECK-NEXT:    ldr r3, [r3, #4]
226; CHECK-NEXT:    adds r3, r4, r3
227; CHECK-NEXT:    ldr r1, [r1]
228; CHECK-NEXT:    ldr r0, [r0]
229; CHECK-NEXT:    ldr r2, [r2]
230; CHECK-NEXT:    pop {r4, r7}
231; CHECK-NEXT:    b h9
232  %zz = load i32, ptr %z
233  %xx = load i32, ptr %x
234  %yy = load i32, ptr %y
235  %aa1 = load i32, ptr %a
236  %a2 = getelementptr i32, ptr %a, i32 1
237  %aa2 = load i32, ptr %a2
238  %aa = add i32 %aa1, %aa2
239  %r = tail call i32 @h9(i32 %xx, i32 %yy, i32 %zz, i32 %aa)
240  ret i32 %r
241}
242