xref: /llvm-project/llvm/test/CodeGen/RISCV/tail-calls.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s
3; RUN: llc -mtriple riscv32-unknown-linux-gnu -mattr=experimental-zicfilp \
4; RUN:   -code-model=large -o - %s \
5; RUN:   | FileCheck %s -check-prefix=CHECK-LARGE-ZICFILP
6; RUN: llc -mtriple riscv32-unknown-elf       -o - %s | FileCheck %s
7
8; Perform tail call optimization for global address.
9declare i32 @callee_tail(i32 %i)
10define i32 @caller_tail(i32 %i) nounwind {
11; CHECK-LABEL: caller_tail:
12; CHECK:       # %bb.0: # %entry
13; CHECK-NEXT:    tail callee_tail
14;
15; CHECK-LARGE-ZICFILP-LABEL: caller_tail:
16; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
17; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
18; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi0:
19; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI0_0)
20; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi0)(a1)
21; CHECK-LARGE-ZICFILP-NEXT:    jr t2
22entry:
23  %r = tail call i32 @callee_tail(i32 %i)
24  ret i32 %r
25}
26
27; Perform tail call optimization for external symbol.
28@dest = global [2 x i8] zeroinitializer
29declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
30define void @caller_extern(ptr %src) optsize {
31; CHECK-LABEL: caller_extern:
32; CHECK:       # %bb.0: # %entry
33; CHECK-NEXT:    lui a1, %hi(dest)
34; CHECK-NEXT:    addi a1, a1, %lo(dest)
35; CHECK-NEXT:    li a2, 7
36; CHECK-NEXT:    mv a3, a0
37; CHECK-NEXT:    mv a0, a1
38; CHECK-NEXT:    mv a1, a3
39; CHECK-NEXT:    tail memcpy
40;
41; CHECK-LARGE-ZICFILP-LABEL: caller_extern:
42; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
43; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
44; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi1:
45; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI1_0)
46; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi2:
47; CHECK-LARGE-ZICFILP-NEXT:    auipc a2, %pcrel_hi(.LCPI1_1)
48; CHECK-LARGE-ZICFILP-NEXT:    lw a1, %pcrel_lo(.Lpcrel_hi1)(a1)
49; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi2)(a2)
50; CHECK-LARGE-ZICFILP-NEXT:    li a2, 7
51; CHECK-LARGE-ZICFILP-NEXT:    mv a3, a0
52; CHECK-LARGE-ZICFILP-NEXT:    mv a0, a1
53; CHECK-LARGE-ZICFILP-NEXT:    mv a1, a3
54; CHECK-LARGE-ZICFILP-NEXT:    jr t2
55entry:
56  tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 7, i1 false)
57  ret void
58}
59
60; Perform tail call optimization for external symbol.
61@dest_pgso = global [2 x i8] zeroinitializer
62define void @caller_extern_pgso(ptr %src) !prof !14 {
63; CHECK-LABEL: caller_extern_pgso:
64; CHECK:       # %bb.0: # %entry
65; CHECK-NEXT:    lui a1, %hi(dest_pgso)
66; CHECK-NEXT:    addi a1, a1, %lo(dest_pgso)
67; CHECK-NEXT:    li a2, 7
68; CHECK-NEXT:    mv a3, a0
69; CHECK-NEXT:    mv a0, a1
70; CHECK-NEXT:    mv a1, a3
71; CHECK-NEXT:    tail memcpy
72;
73; CHECK-LARGE-ZICFILP-LABEL: caller_extern_pgso:
74; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
75; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
76; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi3:
77; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI2_0)
78; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi4:
79; CHECK-LARGE-ZICFILP-NEXT:    auipc a2, %pcrel_hi(.LCPI2_1)
80; CHECK-LARGE-ZICFILP-NEXT:    lw a1, %pcrel_lo(.Lpcrel_hi3)(a1)
81; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi4)(a2)
82; CHECK-LARGE-ZICFILP-NEXT:    li a2, 7
83; CHECK-LARGE-ZICFILP-NEXT:    mv a3, a0
84; CHECK-LARGE-ZICFILP-NEXT:    mv a0, a1
85; CHECK-LARGE-ZICFILP-NEXT:    mv a1, a3
86; CHECK-LARGE-ZICFILP-NEXT:    jr t2
87entry:
88  tail call void @llvm.memcpy.p0.p0.i32(ptr @dest_pgso, ptr %src, i32 7, i1 false)
89  ret void
90}
91
92; Perform indirect tail call optimization (for function pointer call).
93declare void @callee_indirect1()
94declare void @callee_indirect2()
95define void @caller_indirect_tail(i32 %a) nounwind {
96; CHECK-LABEL: caller_indirect_tail:
97; CHECK:       # %bb.0: # %entry
98; CHECK-NEXT:    beqz a0, .LBB3_2
99; CHECK-NEXT:  # %bb.1: # %entry
100; CHECK-NEXT:    lui t1, %hi(callee_indirect2)
101; CHECK-NEXT:    addi t1, t1, %lo(callee_indirect2)
102; CHECK-NEXT:    jr t1
103; CHECK-NEXT:  .LBB3_2:
104; CHECK-NEXT:    lui t1, %hi(callee_indirect1)
105; CHECK-NEXT:    addi t1, t1, %lo(callee_indirect1)
106; CHECK-NEXT:    jr t1
107;
108; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_tail:
109; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
110; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
111; CHECK-LARGE-ZICFILP-NEXT:    beqz a0, .LBB3_2
112; CHECK-LARGE-ZICFILP-NEXT:  # %bb.1: # %entry
113; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi6:
114; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI3_0)
115; CHECK-LARGE-ZICFILP-NEXT:    lw t1, %pcrel_lo(.Lpcrel_hi6)(a0)
116; CHECK-LARGE-ZICFILP-NEXT:    jr t1
117; CHECK-LARGE-ZICFILP-NEXT:  .LBB3_2:
118; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi5:
119; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI3_1)
120; CHECK-LARGE-ZICFILP-NEXT:    lw t1, %pcrel_lo(.Lpcrel_hi5)(a0)
121; CHECK-LARGE-ZICFILP-NEXT:    jr t1
122entry:
123  %tobool = icmp eq i32 %a, 0
124  %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2
125  tail call void %callee()
126  ret void
127}
128
129; Make sure we don't use t0 as the source for jr as that is a hint to pop the
130; return address stack on some microarchitectures.
131define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
132; CHECK-LABEL: caller_indirect_no_t0:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    mv t1, a0
135; CHECK-NEXT:    mv a0, a1
136; CHECK-NEXT:    mv a1, a2
137; CHECK-NEXT:    mv a2, a3
138; CHECK-NEXT:    mv a3, a4
139; CHECK-NEXT:    mv a4, a5
140; CHECK-NEXT:    mv a5, a6
141; CHECK-NEXT:    mv a6, a7
142; CHECK-NEXT:    jr t1
143;
144; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_no_t0:
145; CHECK-LARGE-ZICFILP:       # %bb.0:
146; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
147; CHECK-LARGE-ZICFILP-NEXT:    mv t1, a0
148; CHECK-LARGE-ZICFILP-NEXT:    mv a0, a1
149; CHECK-LARGE-ZICFILP-NEXT:    mv a1, a2
150; CHECK-LARGE-ZICFILP-NEXT:    mv a2, a3
151; CHECK-LARGE-ZICFILP-NEXT:    mv a3, a4
152; CHECK-LARGE-ZICFILP-NEXT:    mv a4, a5
153; CHECK-LARGE-ZICFILP-NEXT:    mv a5, a6
154; CHECK-LARGE-ZICFILP-NEXT:    mv a6, a7
155; CHECK-LARGE-ZICFILP-NEXT:    jr t1
156  %9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7)
157  ret i32 %9
158}
159
160; Do not tail call optimize functions with varargs passed by stack.
161declare i32 @callee_varargs(i32, ...)
162define void @caller_varargs(i32 %a, i32 %b) nounwind {
163; CHECK-LABEL: caller_varargs:
164; CHECK:       # %bb.0: # %entry
165; CHECK-NEXT:    addi sp, sp, -16
166; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
167; CHECK-NEXT:    sw a0, 0(sp)
168; CHECK-NEXT:    mv a2, a1
169; CHECK-NEXT:    mv a3, a0
170; CHECK-NEXT:    mv a4, a0
171; CHECK-NEXT:    mv a5, a1
172; CHECK-NEXT:    mv a6, a1
173; CHECK-NEXT:    mv a7, a0
174; CHECK-NEXT:    call callee_varargs
175; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
176; CHECK-NEXT:    addi sp, sp, 16
177; CHECK-NEXT:    ret
178;
179; CHECK-LARGE-ZICFILP-LABEL: caller_varargs:
180; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
181; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
182; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
183; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
184; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi7:
185; CHECK-LARGE-ZICFILP-NEXT:    auipc a2, %pcrel_hi(.LCPI5_0)
186; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi7)(a2)
187; CHECK-LARGE-ZICFILP-NEXT:    sw a0, 0(sp)
188; CHECK-LARGE-ZICFILP-NEXT:    mv a2, a1
189; CHECK-LARGE-ZICFILP-NEXT:    mv a3, a0
190; CHECK-LARGE-ZICFILP-NEXT:    mv a4, a0
191; CHECK-LARGE-ZICFILP-NEXT:    mv a5, a1
192; CHECK-LARGE-ZICFILP-NEXT:    mv a6, a1
193; CHECK-LARGE-ZICFILP-NEXT:    mv a7, a0
194; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
195; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
196; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
197; CHECK-LARGE-ZICFILP-NEXT:    ret
198entry:
199  %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a)
200  ret void
201}
202
203; Do not tail call optimize if stack is used to pass parameters.
204declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
205define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind {
206; CHECK-LABEL: caller_args:
207; CHECK:       # %bb.0: # %entry
208; CHECK-NEXT:    addi sp, sp, -32
209; CHECK-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
210; CHECK-NEXT:    lw t0, 32(sp)
211; CHECK-NEXT:    lw t1, 36(sp)
212; CHECK-NEXT:    lw t2, 40(sp)
213; CHECK-NEXT:    lw t3, 44(sp)
214; CHECK-NEXT:    lw t4, 48(sp)
215; CHECK-NEXT:    lw t5, 52(sp)
216; CHECK-NEXT:    sw t4, 16(sp)
217; CHECK-NEXT:    sw t5, 20(sp)
218; CHECK-NEXT:    sw t0, 0(sp)
219; CHECK-NEXT:    sw t1, 4(sp)
220; CHECK-NEXT:    sw t2, 8(sp)
221; CHECK-NEXT:    sw t3, 12(sp)
222; CHECK-NEXT:    call callee_args
223; CHECK-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
224; CHECK-NEXT:    addi sp, sp, 32
225; CHECK-NEXT:    ret
226;
227; CHECK-LARGE-ZICFILP-LABEL: caller_args:
228; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
229; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
230; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -32
231; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
232; CHECK-LARGE-ZICFILP-NEXT:    lw t0, 32(sp)
233; CHECK-LARGE-ZICFILP-NEXT:    lw t1, 36(sp)
234; CHECK-LARGE-ZICFILP-NEXT:    lw t3, 40(sp)
235; CHECK-LARGE-ZICFILP-NEXT:    lw t4, 44(sp)
236; CHECK-LARGE-ZICFILP-NEXT:    lw t2, 48(sp)
237; CHECK-LARGE-ZICFILP-NEXT:    lw t5, 52(sp)
238; CHECK-LARGE-ZICFILP-NEXT:    sw t2, 16(sp)
239; CHECK-LARGE-ZICFILP-NEXT:    sw t5, 20(sp)
240; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi8:
241; CHECK-LARGE-ZICFILP-NEXT:    auipc t2, %pcrel_hi(.LCPI6_0)
242; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi8)(t2)
243; CHECK-LARGE-ZICFILP-NEXT:    sw t0, 0(sp)
244; CHECK-LARGE-ZICFILP-NEXT:    sw t1, 4(sp)
245; CHECK-LARGE-ZICFILP-NEXT:    sw t3, 8(sp)
246; CHECK-LARGE-ZICFILP-NEXT:    sw t4, 12(sp)
247; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
248; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
249; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 32
250; CHECK-LARGE-ZICFILP-NEXT:    ret
251entry:
252  %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
253  ret i32 %r
254}
255
256; Do not tail call optimize if parameters need to be passed indirectly.
257declare i32 @callee_indirect_args(fp128 %a)
258define void @caller_indirect_args() nounwind {
259; CHECK-LABEL: caller_indirect_args:
260; CHECK:       # %bb.0: # %entry
261; CHECK-NEXT:    addi sp, sp, -32
262; CHECK-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
263; CHECK-NEXT:    lui a1, 262128
264; CHECK-NEXT:    mv a0, sp
265; CHECK-NEXT:    sw zero, 0(sp)
266; CHECK-NEXT:    sw zero, 4(sp)
267; CHECK-NEXT:    sw zero, 8(sp)
268; CHECK-NEXT:    sw a1, 12(sp)
269; CHECK-NEXT:    call callee_indirect_args
270; CHECK-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
271; CHECK-NEXT:    addi sp, sp, 32
272; CHECK-NEXT:    ret
273;
274; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_args:
275; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
276; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
277; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -32
278; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
279; CHECK-LARGE-ZICFILP-NEXT:    lui a1, 262128
280; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi9:
281; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI7_0)
282; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi9)(a0)
283; CHECK-LARGE-ZICFILP-NEXT:    mv a0, sp
284; CHECK-LARGE-ZICFILP-NEXT:    sw zero, 0(sp)
285; CHECK-LARGE-ZICFILP-NEXT:    sw zero, 4(sp)
286; CHECK-LARGE-ZICFILP-NEXT:    sw zero, 8(sp)
287; CHECK-LARGE-ZICFILP-NEXT:    sw a1, 12(sp)
288; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
289; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
290; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 32
291; CHECK-LARGE-ZICFILP-NEXT:    ret
292entry:
293  %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000)
294  ret void
295}
296
297; Perform tail call optimization for external weak symbol.
298declare extern_weak void @callee_weak()
299define void @caller_weak() nounwind {
300; CHECK-LABEL: caller_weak:
301; CHECK:       # %bb.0: # %entry
302; CHECK-NEXT:    tail callee_weak
303;
304; CHECK-LARGE-ZICFILP-LABEL: caller_weak:
305; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
306; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
307; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi10:
308; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI8_0)
309; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi10)(a0)
310; CHECK-LARGE-ZICFILP-NEXT:    jr t2
311entry:
312  tail call void @callee_weak()
313  ret void
314}
315
316; Exception-handling functions need a special set of instructions to indicate a
317; return to the hardware. Tail-calling another function would probably break
318; this.
319declare void @callee_irq()
320define void @caller_irq() nounwind "interrupt"="machine" {
321; CHECK-LABEL: caller_irq:
322; CHECK:       # %bb.0: # %entry
323; CHECK-NEXT:    addi sp, sp, -64
324; CHECK-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
325; CHECK-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
326; CHECK-NEXT:    sw t1, 52(sp) # 4-byte Folded Spill
327; CHECK-NEXT:    sw t2, 48(sp) # 4-byte Folded Spill
328; CHECK-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
329; CHECK-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
330; CHECK-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
331; CHECK-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
332; CHECK-NEXT:    sw a4, 28(sp) # 4-byte Folded Spill
333; CHECK-NEXT:    sw a5, 24(sp) # 4-byte Folded Spill
334; CHECK-NEXT:    sw a6, 20(sp) # 4-byte Folded Spill
335; CHECK-NEXT:    sw a7, 16(sp) # 4-byte Folded Spill
336; CHECK-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
337; CHECK-NEXT:    sw t4, 8(sp) # 4-byte Folded Spill
338; CHECK-NEXT:    sw t5, 4(sp) # 4-byte Folded Spill
339; CHECK-NEXT:    sw t6, 0(sp) # 4-byte Folded Spill
340; CHECK-NEXT:    call callee_irq
341; CHECK-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
342; CHECK-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
343; CHECK-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
344; CHECK-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
345; CHECK-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
346; CHECK-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
347; CHECK-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
348; CHECK-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
349; CHECK-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
350; CHECK-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
351; CHECK-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
352; CHECK-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
353; CHECK-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
354; CHECK-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
355; CHECK-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
356; CHECK-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
357; CHECK-NEXT:    addi sp, sp, 64
358; CHECK-NEXT:    mret
359;
360; CHECK-LARGE-ZICFILP-LABEL: caller_irq:
361; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
362; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -64
363; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
364; CHECK-LARGE-ZICFILP-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
365; CHECK-LARGE-ZICFILP-NEXT:    sw t1, 52(sp) # 4-byte Folded Spill
366; CHECK-LARGE-ZICFILP-NEXT:    sw t2, 48(sp) # 4-byte Folded Spill
367; CHECK-LARGE-ZICFILP-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
368; CHECK-LARGE-ZICFILP-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
369; CHECK-LARGE-ZICFILP-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
370; CHECK-LARGE-ZICFILP-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
371; CHECK-LARGE-ZICFILP-NEXT:    sw a4, 28(sp) # 4-byte Folded Spill
372; CHECK-LARGE-ZICFILP-NEXT:    sw a5, 24(sp) # 4-byte Folded Spill
373; CHECK-LARGE-ZICFILP-NEXT:    sw a6, 20(sp) # 4-byte Folded Spill
374; CHECK-LARGE-ZICFILP-NEXT:    sw a7, 16(sp) # 4-byte Folded Spill
375; CHECK-LARGE-ZICFILP-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
376; CHECK-LARGE-ZICFILP-NEXT:    sw t4, 8(sp) # 4-byte Folded Spill
377; CHECK-LARGE-ZICFILP-NEXT:    sw t5, 4(sp) # 4-byte Folded Spill
378; CHECK-LARGE-ZICFILP-NEXT:    sw t6, 0(sp) # 4-byte Folded Spill
379; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi11:
380; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI9_0)
381; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi11)(a0)
382; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
383; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
384; CHECK-LARGE-ZICFILP-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
385; CHECK-LARGE-ZICFILP-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
386; CHECK-LARGE-ZICFILP-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
387; CHECK-LARGE-ZICFILP-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
388; CHECK-LARGE-ZICFILP-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
389; CHECK-LARGE-ZICFILP-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
390; CHECK-LARGE-ZICFILP-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
391; CHECK-LARGE-ZICFILP-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
392; CHECK-LARGE-ZICFILP-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
393; CHECK-LARGE-ZICFILP-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
394; CHECK-LARGE-ZICFILP-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
395; CHECK-LARGE-ZICFILP-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
396; CHECK-LARGE-ZICFILP-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
397; CHECK-LARGE-ZICFILP-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
398; CHECK-LARGE-ZICFILP-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
399; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 64
400; CHECK-LARGE-ZICFILP-NEXT:    mret
401entry:
402  tail call void @callee_irq()
403  ret void
404}
405
406; Byval parameters hand the function a pointer directly into the stack area
407; we want to reuse during a tail call. Do not tail call optimize functions with
408; byval parameters.
409declare i32 @callee_byval(ptr byval(ptr) %a)
410define i32 @caller_byval() nounwind {
411; CHECK-LABEL: caller_byval:
412; CHECK:       # %bb.0: # %entry
413; CHECK-NEXT:    addi sp, sp, -16
414; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
415; CHECK-NEXT:    lw a0, 8(sp)
416; CHECK-NEXT:    sw a0, 4(sp)
417; CHECK-NEXT:    addi a0, sp, 4
418; CHECK-NEXT:    call callee_byval
419; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
420; CHECK-NEXT:    addi sp, sp, 16
421; CHECK-NEXT:    ret
422;
423; CHECK-LARGE-ZICFILP-LABEL: caller_byval:
424; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
425; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
426; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
427; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
428; CHECK-LARGE-ZICFILP-NEXT:    lw a0, 8(sp)
429; CHECK-LARGE-ZICFILP-NEXT:    sw a0, 4(sp)
430; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi12:
431; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI10_0)
432; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi12)(a0)
433; CHECK-LARGE-ZICFILP-NEXT:    addi a0, sp, 4
434; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
435; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
436; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
437; CHECK-LARGE-ZICFILP-NEXT:    ret
438entry:
439  %a = alloca ptr
440  %r = tail call i32 @callee_byval(ptr byval(ptr) %a)
441  ret i32 %r
442}
443
444; Do not tail call optimize if callee uses structret semantics.
445%struct.A = type { i32 }
446@a = global %struct.A zeroinitializer
447
448declare void @callee_struct(ptr sret(%struct.A) %a)
449define void @caller_nostruct() nounwind {
450; CHECK-LABEL: caller_nostruct:
451; CHECK:       # %bb.0: # %entry
452; CHECK-NEXT:    addi sp, sp, -16
453; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
454; CHECK-NEXT:    lui a0, %hi(a)
455; CHECK-NEXT:    addi a0, a0, %lo(a)
456; CHECK-NEXT:    call callee_struct
457; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
458; CHECK-NEXT:    addi sp, sp, 16
459; CHECK-NEXT:    ret
460;
461; CHECK-LARGE-ZICFILP-LABEL: caller_nostruct:
462; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
463; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
464; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
465; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
466; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi13:
467; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI11_0)
468; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi14:
469; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI11_1)
470; CHECK-LARGE-ZICFILP-NEXT:    lw a0, %pcrel_lo(.Lpcrel_hi13)(a0)
471; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi14)(a1)
472; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
473; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
474; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
475; CHECK-LARGE-ZICFILP-NEXT:    ret
476entry:
477  tail call void @callee_struct(ptr sret(%struct.A) @a)
478  ret void
479}
480
481; Do not tail call optimize if caller uses structret semantics.
482declare void @callee_nostruct()
483define void @caller_struct(ptr sret(%struct.A) %a) nounwind {
484; CHECK-LABEL: caller_struct:
485; CHECK:       # %bb.0: # %entry
486; CHECK-NEXT:    addi sp, sp, -16
487; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
488; CHECK-NEXT:    call callee_nostruct
489; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
490; CHECK-NEXT:    addi sp, sp, 16
491; CHECK-NEXT:    ret
492;
493; CHECK-LARGE-ZICFILP-LABEL: caller_struct:
494; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
495; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
496; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
497; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
498; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi15:
499; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI12_0)
500; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi15)(a0)
501; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
502; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
503; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
504; CHECK-LARGE-ZICFILP-NEXT:    ret
505entry:
506  tail call void @callee_nostruct()
507  ret void
508}
509
510; Do not tail call optimize if disabled.
511define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" {
512; CHECK-LABEL: disable_tail_calls:
513; CHECK:       # %bb.0: # %entry
514; CHECK-NEXT:    addi sp, sp, -16
515; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
516; CHECK-NEXT:    call callee_tail
517; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
518; CHECK-NEXT:    addi sp, sp, 16
519; CHECK-NEXT:    ret
520;
521; CHECK-LARGE-ZICFILP-LABEL: disable_tail_calls:
522; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
523; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
524; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
525; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
526; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi16:
527; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI13_0)
528; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi16)(a1)
529; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
530; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
531; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
532; CHECK-LARGE-ZICFILP-NEXT:    ret
533entry:
534  %rv = tail call i32 @callee_tail(i32 %i)
535  ret i32 %rv
536}
537
538; Duplicate returns to enable tail call optimizations.
539declare i32 @test()
540declare i32 @test1()
541declare i32 @test2()
542declare i32 @test3()
543define i32 @duplicate_returns(i32 %a, i32 %b) nounwind {
544; CHECK-LABEL: duplicate_returns:
545; CHECK:       # %bb.0: # %entry
546; CHECK-NEXT:    beqz a0, .LBB14_4
547; CHECK-NEXT:  # %bb.1: # %if.else
548; CHECK-NEXT:    beqz a1, .LBB14_5
549; CHECK-NEXT:  # %bb.2: # %if.else4
550; CHECK-NEXT:    bge a1, a0, .LBB14_6
551; CHECK-NEXT:  # %bb.3: # %if.then6
552; CHECK-NEXT:    tail test2
553; CHECK-NEXT:  .LBB14_4: # %if.then
554; CHECK-NEXT:    tail test
555; CHECK-NEXT:  .LBB14_5: # %if.then2
556; CHECK-NEXT:    tail test1
557; CHECK-NEXT:  .LBB14_6: # %if.else8
558; CHECK-NEXT:    tail test3
559;
560; CHECK-LARGE-ZICFILP-LABEL: duplicate_returns:
561; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
562; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
563; CHECK-LARGE-ZICFILP-NEXT:    beqz a0, .LBB14_4
564; CHECK-LARGE-ZICFILP-NEXT:  # %bb.1: # %if.else
565; CHECK-LARGE-ZICFILP-NEXT:    beqz a1, .LBB14_5
566; CHECK-LARGE-ZICFILP-NEXT:  # %bb.2: # %if.else4
567; CHECK-LARGE-ZICFILP-NEXT:    bge a1, a0, .LBB14_6
568; CHECK-LARGE-ZICFILP-NEXT:  # %bb.3: # %if.then6
569; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi19:
570; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI14_1)
571; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi19)(a0)
572; CHECK-LARGE-ZICFILP-NEXT:    jr t2
573; CHECK-LARGE-ZICFILP-NEXT:  .LBB14_4: # %if.then
574; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi17:
575; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI14_3)
576; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi17)(a0)
577; CHECK-LARGE-ZICFILP-NEXT:    jr t2
578; CHECK-LARGE-ZICFILP-NEXT:  .LBB14_5: # %if.then2
579; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi18:
580; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI14_2)
581; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi18)(a0)
582; CHECK-LARGE-ZICFILP-NEXT:    jr t2
583; CHECK-LARGE-ZICFILP-NEXT:  .LBB14_6: # %if.else8
584; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi20:
585; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI14_0)
586; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi20)(a0)
587; CHECK-LARGE-ZICFILP-NEXT:    jr t2
588entry:
589  %cmp = icmp eq i32 %a, 0
590  br i1 %cmp, label %if.then, label %if.else
591
592if.then:                                          ; preds = %entry
593  %call = tail call i32 @test()
594  br label %return
595
596if.else:                                          ; preds = %entry
597  %cmp1 = icmp eq i32 %b, 0
598  br i1 %cmp1, label %if.then2, label %if.else4
599
600if.then2:                                         ; preds = %if.else
601  %call3 = tail call i32 @test1()
602  br label %return
603
604if.else4:                                         ; preds = %if.else
605  %cmp5 = icmp sgt i32 %a, %b
606  br i1 %cmp5, label %if.then6, label %if.else8
607
608if.then6:                                         ; preds = %if.else4
609  %call7 = tail call i32 @test2()
610  br label %return
611
612if.else8:                                         ; preds = %if.else4
613  %call9 = tail call i32 @test3()
614  br label %return
615
616return:                                           ; preds = %if.else8, %if.then6, %if.then2, %if.then
617  %retval = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %call9, %if.else8 ]
618  ret i32 %retval
619}
620
621!llvm.module.flags = !{!0}
622!0 = !{i32 1, !"ProfileSummary", !1}
623!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
624!2 = !{!"ProfileFormat", !"InstrProf"}
625!3 = !{!"TotalCount", i64 10000}
626!4 = !{!"MaxCount", i64 10}
627!5 = !{!"MaxInternalCount", i64 1}
628!6 = !{!"MaxFunctionCount", i64 1000}
629!7 = !{!"NumCounts", i64 3}
630!8 = !{!"NumFunctions", i64 3}
631!9 = !{!"DetailedSummary", !10}
632!10 = !{!11, !12, !13}
633!11 = !{i32 10000, i64 100, i32 1}
634!12 = !{i32 999000, i64 100, i32 1}
635!13 = !{i32 999999, i64 1, i32 2}
636!14 = !{!"function_entry_count", i64 0}
637