xref: /llvm-project/llvm/test/Transforms/TailCallElim/reorder_load.ll (revision 8dc14a3bc05e50e86d7b26ac141364eccbceb134)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
3; PR4323
4
5target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6
7; Several cases where tail call elimination should move the load above the call,
8; then eliminate the tail recursion.
9
10
11
12@global = external global i32		; <ptr> [#uses=1]
13@extern_weak_global = extern_weak global i32		; <ptr> [#uses=1]
14
15
16; This load can be moved above the call because the function won't write to it
17; and the call has no side effects.
18define fastcc i32 @raise_load_1(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly willreturn {
19; CHECK-LABEL: @raise_load_1(
20; CHECK-NEXT:  entry:
21; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
22; CHECK:       tailrecurse:
23; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
24; CHECK-NEXT:    [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
25; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
26; CHECK-NEXT:    br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
27; CHECK:       if:
28; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
29; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
30; CHECK:       else:
31; CHECK-NEXT:    [[TMP7]] = add i32 [[START_ARG_TR]], 1
32; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ARG:%.*]], align 4
33; CHECK-NEXT:    [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
34; CHECK-NEXT:    br label [[TAILRECURSE]]
35;
36entry:
37  %tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
38  br i1 %tmp2, label %if, label %else
39
40if:		; preds = %entry
41  ret i32 0
42
43else:		; preds = %entry
44  %tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
45  %tmp8 = call fastcc i32 @raise_load_1(ptr %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
46  %tmp9 = load i32, ptr %a_arg		; <i32> [#uses=1]
47  %tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
48  ret i32 %tmp10
49}
50
51
52; This load can be moved above the call because the function won't write to it
53; and the load provably can't trap.
54define fastcc i32 @raise_load_2(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
55; CHECK-LABEL: @raise_load_2(
56; CHECK-NEXT:  entry:
57; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
58; CHECK:       tailrecurse:
59; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[RECURSE:%.*]] ]
60; CHECK-NEXT:    [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[RECURSE]] ]
61; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
62; CHECK-NEXT:    br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE:%.*]]
63; CHECK:       if:
64; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
65; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
66; CHECK:       else:
67; CHECK-NEXT:    [[NULLCHECK:%.*]] = icmp eq ptr [[A_ARG:%.*]], null
68; CHECK-NEXT:    br i1 [[NULLCHECK]], label [[UNWIND:%.*]], label [[RECURSE]]
69; CHECK:       unwind:
70; CHECK-NEXT:    unreachable
71; CHECK:       recurse:
72; CHECK-NEXT:    [[TMP7]] = add i32 [[START_ARG_TR]], 1
73; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr @global, align 4
74; CHECK-NEXT:    [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
75; CHECK-NEXT:    br label [[TAILRECURSE]]
76;
77entry:
78  %tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
79  br i1 %tmp2, label %if, label %else
80
81if:		; preds = %entry
82  ret i32 0
83
84else:		; preds = %entry
85  %nullcheck = icmp eq ptr %a_arg, null		; <i1> [#uses=1]
86  br i1 %nullcheck, label %unwind, label %recurse
87
88unwind:		; preds = %else
89  unreachable
90
91recurse:		; preds = %else
92  %tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
93  %tmp8 = call fastcc i32 @raise_load_2(ptr %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
94  %tmp9 = load i32, ptr @global		; <i32> [#uses=1]
95  %tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
96  ret i32 %tmp10
97}
98
99
100; This load can be safely moved above the call (even though it's from an
101; extern_weak global) because the call has no side effects.
102define fastcc i32 @raise_load_3(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly willreturn {
103; CHECK-LABEL: @raise_load_3(
104; CHECK-NEXT:  entry:
105; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
106; CHECK:       tailrecurse:
107; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
108; CHECK-NEXT:    [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
109; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
110; CHECK-NEXT:    br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
111; CHECK:       if:
112; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
113; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
114; CHECK:       else:
115; CHECK-NEXT:    [[TMP7]] = add i32 [[START_ARG_TR]], 1
116; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr @extern_weak_global, align 4
117; CHECK-NEXT:    [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
118; CHECK-NEXT:    br label [[TAILRECURSE]]
119;
120entry:
121  %tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
122  br i1 %tmp2, label %if, label %else
123
124if:		; preds = %entry
125  ret i32 0
126
127else:		; preds = %entry
128  %tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
129  %tmp8 = call fastcc i32 @raise_load_3(ptr %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
130  %tmp9 = load i32, ptr @extern_weak_global		; <i32> [#uses=1]
131  %tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
132  ret i32 %tmp10
133}
134
135
136; The second load can be safely moved above the call even though it's from an
137; unknown pointer (which normally means it might trap) because the first load
138; proves it doesn't trap.
139define fastcc i32 @raise_load_4(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
140; CHECK-LABEL: @raise_load_4(
141; CHECK-NEXT:  entry:
142; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
143; CHECK:       tailrecurse:
144; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[RECURSE:%.*]] ]
145; CHECK-NEXT:    [[A_LEN_ARG_TR:%.*]] = phi i32 [ [[A_LEN_ARG:%.*]], [[ENTRY]] ], [ [[FIRST:%.*]], [[RECURSE]] ]
146; CHECK-NEXT:    [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[RECURSE]] ]
147; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG_TR]]
148; CHECK-NEXT:    br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE:%.*]]
149; CHECK:       if:
150; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
151; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
152; CHECK:       else:
153; CHECK-NEXT:    [[NULLCHECK:%.*]] = icmp eq ptr [[A_ARG:%.*]], null
154; CHECK-NEXT:    br i1 [[NULLCHECK]], label [[UNWIND:%.*]], label [[RECURSE]]
155; CHECK:       unwind:
156; CHECK-NEXT:    unreachable
157; CHECK:       recurse:
158; CHECK-NEXT:    [[TMP7]] = add i32 [[START_ARG_TR]], 1
159; CHECK-NEXT:    [[FIRST]] = load i32, ptr [[A_ARG]], align 4
160; CHECK-NEXT:    [[SECOND:%.*]] = load i32, ptr [[A_ARG]], align 4
161; CHECK-NEXT:    [[TMP10]] = add i32 [[SECOND]], [[ACCUMULATOR_TR]]
162; CHECK-NEXT:    br label [[TAILRECURSE]]
163;
164entry:
165  %tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
166  br i1 %tmp2, label %if, label %else
167
168if:		; preds = %entry
169  ret i32 0
170
171else:		; preds = %entry
172  %nullcheck = icmp eq ptr %a_arg, null		; <i1> [#uses=1]
173  br i1 %nullcheck, label %unwind, label %recurse
174
175unwind:		; preds = %else
176  unreachable
177
178recurse:		; preds = %else
179  %tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
180  %first = load i32, ptr %a_arg		; <i32> [#uses=1]
181  %tmp8 = call fastcc i32 @raise_load_4(ptr %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
182  %second = load i32, ptr %a_arg		; <i32> [#uses=1]
183  %tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
184  ret i32 %tmp10
185}
186
187; This load can be moved above the call because the function won't write to it
188; and the a_arg is dereferenceable.
189define fastcc i32 @raise_load_5(ptr dereferenceable(4) align 4 %a_arg, i32 %a_len_arg, i32 %start_arg) readonly nofree nosync {
190; CHECK-LABEL: @raise_load_5(
191; CHECK-NEXT:  entry:
192; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
193; CHECK:       tailrecurse:
194; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
195; CHECK-NEXT:    [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
196; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
197; CHECK-NEXT:    br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
198; CHECK:       if:
199; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
200; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
201; CHECK:       else:
202; CHECK-NEXT:    [[TMP7]] = add i32 [[START_ARG_TR]], 1
203; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ARG:%.*]], align 4
204; CHECK-NEXT:    [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
205; CHECK-NEXT:    br label [[TAILRECURSE]]
206;
207entry:
208  %tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
209  br i1 %tmp2, label %if, label %else
210
211if:		; preds = %entry
212  ret i32 0
213
214else:		; preds = %entry
215  %tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
216  %tmp8 = call fastcc i32 @raise_load_5(ptr %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
217  %tmp9 = load i32, ptr %a_arg		; <i32> [#uses=1]
218  %tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
219  ret i32 %tmp10
220}
221
222; This load can be moved above the call because the function call does not write to the memory the load
223; is accessing and the load is safe to speculate.
224define fastcc i32 @raise_load_6(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind  {
225; CHECK-LABEL: @raise_load_6(
226; CHECK-NEXT:  entry:
227; CHECK-NEXT:    [[S:%.*]] = alloca i32, align 4
228; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
229; CHECK:       tailrecurse:
230; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
231; CHECK-NEXT:    [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
232; CHECK-NEXT:    store i32 4, ptr [[S]], align 4
233; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
234; CHECK-NEXT:    br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
235; CHECK:       if:
236; CHECK-NEXT:    store i32 1, ptr [[A_ARG:%.*]], align 4
237; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
238; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
239; CHECK:       else:
240; CHECK-NEXT:    [[TMP7]] = add i32 [[START_ARG_TR]], 1
241; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[S]], align 4
242; CHECK-NEXT:    [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
243; CHECK-NEXT:    br label [[TAILRECURSE]]
244;
245entry:
246  %s = alloca i32
247  store i32 4, ptr %s
248  %tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
249  br i1 %tmp2, label %if, label %else
250
251if:		; preds = %entry
252  store i32 1, ptr %a_arg
253  ret i32 0
254
255else:		; preds = %entry
256  %tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
257  %tmp8 = call fastcc i32 @raise_load_6(ptr %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
258  %tmp9 = load i32, ptr %s		; <i32> [#uses=1]
259  %tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
260  ret i32 %tmp10
261}
262