xref: /llvm-project/llvm/test/CodeGen/LoongArch/tail-calls.ll (revision 1897bf61f0bc85c8637997d0f2aa7d94d375d787)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc --mtriple=loongarch64 -mattr=+d < %s | FileCheck %s
3
4;; Perform tail call optimization for global address.
5declare i32 @callee_tail(i32 %i)
6define i32 @caller_tail(i32 %i) nounwind {
7; CHECK-LABEL: caller_tail:
8; CHECK:       # %bb.0: # %entry
9; CHECK-NEXT:    b %plt(callee_tail)
10entry:
11  %r = tail call i32 @callee_tail(i32 %i)
12  ret i32 %r
13}
14
15;; Perform tail call optimization for external symbol.
16;; Bytes copied should be large enough, otherwise the memcpy call would be optimized to multiple ld/st insns.
17@dest = global [2 x i8] zeroinitializer
18declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
19define void @caller_extern(ptr %src) optsize {
20; CHECK-LABEL: caller_extern:
21; CHECK:       # %bb.0: # %entry
22; CHECK-NEXT:    pcalau12i $a1, %got_pc_hi20(dest)
23; CHECK-NEXT:    ld.d $a1, $a1, %got_pc_lo12(dest)
24; CHECK-NEXT:    ori $a2, $zero, 33
25; CHECK-NEXT:    move $a3, $a0
26; CHECK-NEXT:    move $a0, $a1
27; CHECK-NEXT:    move $a1, $a3
28; CHECK-NEXT:    b %plt(memcpy)
29entry:
30  tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 33, i1 false)
31  ret void
32}
33
34;; Perform indirect tail call optimization (for function pointer call).
35declare void @callee_indirect1()
36declare void @callee_indirect2()
37define void @caller_indirect_tail(i32 %a) nounwind {
38; CHECK-LABEL: caller_indirect_tail:
39; CHECK:       # %bb.0: # %entry
40; CHECK-NEXT:    pcalau12i $a1, %got_pc_hi20(callee_indirect2)
41; CHECK-NEXT:    ld.d $a1, $a1, %got_pc_lo12(callee_indirect2)
42; CHECK-NEXT:    pcalau12i $a2, %got_pc_hi20(callee_indirect1)
43; CHECK-NEXT:    ld.d $a2, $a2, %got_pc_lo12(callee_indirect1)
44; CHECK-NEXT:    addi.w $a0, $a0, 0
45; CHECK-NEXT:    sltui $a0, $a0, 1
46; CHECK-NEXT:    masknez $a1, $a1, $a0
47; CHECK-NEXT:    maskeqz $a0, $a2, $a0
48; CHECK-NEXT:    or $a0, $a0, $a1
49; CHECK-NEXT:    jr $a0
50entry:
51  %tobool = icmp eq i32 %a, 0
52  %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2
53  tail call void %callee()
54  ret void
55}
56
57;; Do not tail call optimize functions with varargs passed by stack.
58declare i32 @callee_varargs(i32, ...)
59define void @caller_varargs(i32 %a, i32 %b) nounwind {
60; CHECK-LABEL: caller_varargs:
61; CHECK:       # %bb.0: # %entry
62; CHECK-NEXT:    addi.d $sp, $sp, -16
63; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
64; CHECK-NEXT:    st.d $a0, $sp, 0
65; CHECK-NEXT:    move $a2, $a1
66; CHECK-NEXT:    move $a3, $a0
67; CHECK-NEXT:    move $a4, $a0
68; CHECK-NEXT:    move $a5, $a1
69; CHECK-NEXT:    move $a6, $a1
70; CHECK-NEXT:    move $a7, $a0
71; CHECK-NEXT:    bl %plt(callee_varargs)
72; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
73; CHECK-NEXT:    addi.d $sp, $sp, 16
74; CHECK-NEXT:    ret
75entry:
76  %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a)
77  ret void
78}
79
80;; Do not tail call optimize if stack is used to pass parameters.
81declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i)
82define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i) nounwind {
83; CHECK-LABEL: caller_args:
84; CHECK:       # %bb.0: # %entry
85; CHECK-NEXT:    addi.d $sp, $sp, -16
86; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
87; CHECK-NEXT:    ld.d $t0, $sp, 16
88; CHECK-NEXT:    st.d $t0, $sp, 0
89; CHECK-NEXT:    bl %plt(callee_args)
90; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
91; CHECK-NEXT:    addi.d $sp, $sp, 16
92; CHECK-NEXT:    ret
93entry:
94  %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i)
95  ret i32 %r
96}
97
98;; Do not tail call optimize if parameters need to be passed indirectly.
99declare i32 @callee_indirect_args(i256 %a)
100define void @caller_indirect_args() nounwind {
101; CHECK-LABEL: caller_indirect_args:
102; CHECK:       # %bb.0: # %entry
103; CHECK-NEXT:    addi.d $sp, $sp, -48
104; CHECK-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
105; CHECK-NEXT:    st.d $zero, $sp, 24
106; CHECK-NEXT:    vrepli.b $vr0, 0
107; CHECK-NEXT:    vst $vr0, $sp, 8
108; CHECK-NEXT:    ori $a1, $zero, 1
109; CHECK-NEXT:    addi.d $a0, $sp, 0
110; CHECK-NEXT:    st.d $a1, $sp, 0
111; CHECK-NEXT:    bl %plt(callee_indirect_args)
112; CHECK-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
113; CHECK-NEXT:    addi.d $sp, $sp, 48
114; CHECK-NEXT:    ret
115entry:
116  %call = tail call i32 @callee_indirect_args(i256 1)
117  ret void
118}
119
120;; Do not tail call optimize if byval parameters need to be passed.
121declare i32 @callee_byval(ptr byval(ptr) %a)
122define i32 @caller_byval() nounwind {
123; CHECK-LABEL: caller_byval:
124; CHECK:       # %bb.0: # %entry
125; CHECK-NEXT:    addi.d $sp, $sp, -32
126; CHECK-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
127; CHECK-NEXT:    ld.d $a0, $sp, 16
128; CHECK-NEXT:    st.d $a0, $sp, 8
129; CHECK-NEXT:    addi.d $a0, $sp, 8
130; CHECK-NEXT:    bl %plt(callee_byval)
131; CHECK-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
132; CHECK-NEXT:    addi.d $sp, $sp, 32
133; CHECK-NEXT:    ret
134entry:
135  %a = alloca ptr
136  %r = tail call i32 @callee_byval(ptr byval(ptr) %a)
137  ret i32 %r
138}
139
140;; Do not tail call optimize if callee uses structret semantics.
141%struct.A = type { i32 }
142@a = global %struct.A zeroinitializer
143
144declare void @callee_struct(ptr sret(%struct.A) %a)
145define void @caller_nostruct() nounwind {
146; CHECK-LABEL: caller_nostruct:
147; CHECK:       # %bb.0: # %entry
148; CHECK-NEXT:    addi.d $sp, $sp, -16
149; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
150; CHECK-NEXT:    pcalau12i $a0, %got_pc_hi20(a)
151; CHECK-NEXT:    ld.d $a0, $a0, %got_pc_lo12(a)
152; CHECK-NEXT:    bl %plt(callee_struct)
153; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
154; CHECK-NEXT:    addi.d $sp, $sp, 16
155; CHECK-NEXT:    ret
156entry:
157  tail call void @callee_struct(ptr sret(%struct.A) @a)
158  ret void
159}
160
161;; Do not tail call optimize if caller uses structret semantics.
162declare void @callee_nostruct()
163define void @caller_struct(ptr sret(%struct.A) %a) nounwind {
164; CHECK-LABEL: caller_struct:
165; CHECK:       # %bb.0: # %entry
166; CHECK-NEXT:    addi.d $sp, $sp, -16
167; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
168; CHECK-NEXT:    bl %plt(callee_nostruct)
169; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
170; CHECK-NEXT:    addi.d $sp, $sp, 16
171; CHECK-NEXT:    ret
172entry:
173  tail call void @callee_nostruct()
174  ret void
175}
176
177;; Do not tail call optimize if disabled.
178define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" {
179; CHECK-LABEL: disable_tail_calls:
180; CHECK:       # %bb.0: # %entry
181; CHECK-NEXT:    addi.d $sp, $sp, -16
182; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
183; CHECK-NEXT:    bl %plt(callee_tail)
184; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
185; CHECK-NEXT:    addi.d $sp, $sp, 16
186; CHECK-NEXT:    ret
187entry:
188  %rv = tail call i32 @callee_tail(i32 %i)
189  ret i32 %rv
190}
191