xref: /llvm-project/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll (revision ef992b60798b6cd2c50b25351bfc392e319896b7)
1; RUN: opt < %s -passes=loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
2; RUN: opt < %s -passes=loop-unroll,loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
3; RUN: opt < %s -passes=loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s
4;
5; Run loop unrolling twice to verify that loop unrolling metadata is properly
6; removed and further unrolling is disabled after the pass is run once.
7
8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9target triple = "x86_64-unknown-linux-gnu"
10
11; loop4 contains a small loop which should be completely unrolled by
12; the default unrolling heuristics.  It serves as a control for the
13; unroll(disable) pragma test loop4_with_disable.
14;
15; CHECK-LABEL: @loop4(
16; CHECK-NOT: br i1
17define void @loop4(ptr nocapture %a) {
18entry:
19  br label %for.body
20
21for.body:                                         ; preds = %for.body, %entry
22  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
23  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
24  %0 = load i32, ptr %arrayidx, align 4
25  %inc = add nsw i32 %0, 1
26  store i32 %inc, ptr %arrayidx, align 4
27  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
28  %exitcond = icmp eq i64 %indvars.iv.next, 4
29  br i1 %exitcond, label %for.end, label %for.body
30
31for.end:                                          ; preds = %for.body
32  ret void
33}
34
35; #pragma clang loop unroll(disable)
36;
37; CHECK-LABEL: @loop4_with_disable(
38; CHECK: store i32
39; CHECK-NOT: store i32
40; CHECK: br i1
41define void @loop4_with_disable(ptr nocapture %a) {
42entry:
43  br label %for.body
44
45for.body:                                         ; preds = %for.body, %entry
46  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
47  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
48  %0 = load i32, ptr %arrayidx, align 4
49  %inc = add nsw i32 %0, 1
50  store i32 %inc, ptr %arrayidx, align 4
51  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
52  %exitcond = icmp eq i64 %indvars.iv.next, 4
53  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
54
55for.end:                                          ; preds = %for.body
56  ret void
57}
58!1 = !{!1, !2}
59!2 = !{!"llvm.loop.unroll.disable"}
60
61; loop64 has a high enough count that it should *not* be unrolled by
62; the default unrolling heuristic.  It serves as the control for the
63; unroll(full) pragma test loop64_with_.* tests below.
64;
65; CHECK-LABEL: @loop64(
66; CHECK: store i32
67; CHECK-NOT: store i32
68; CHECK: br i1
69define void @loop64(ptr nocapture %a) {
70entry:
71  br label %for.body
72
73for.body:                                         ; preds = %for.body, %entry
74  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
75  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
76  %0 = load i32, ptr %arrayidx, align 4
77  %inc = add nsw i32 %0, 1
78  store i32 %inc, ptr %arrayidx, align 4
79  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
80  %exitcond = icmp eq i64 %indvars.iv.next, 64
81  br i1 %exitcond, label %for.end, label %for.body
82
83for.end:                                          ; preds = %for.body
84  ret void
85}
86
87; #pragma clang loop unroll(full)
88; Loop should be fully unrolled.
89;
90; CHECK-LABEL: @loop64_with_full(
91; CHECK-NOT: br i1
92define void @loop64_with_full(ptr nocapture %a) {
93entry:
94  br label %for.body
95
96for.body:                                         ; preds = %for.body, %entry
97  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
98  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
99  %0 = load i32, ptr %arrayidx, align 4
100  %inc = add nsw i32 %0, 1
101  store i32 %inc, ptr %arrayidx, align 4
102  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
103  %exitcond = icmp eq i64 %indvars.iv.next, 64
104  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
105
106for.end:                                          ; preds = %for.body
107  ret void
108}
109!3 = !{!3, !4}
110!4 = !{!"llvm.loop.unroll.full"}
111
112; #pragma clang loop unroll_count(4)
113; Loop should be unrolled 4 times.
114;
115; CHECK-LABEL: @loop64_with_count4(
116; CHECK: store i32
117; CHECK: store i32
118; CHECK: store i32
119; CHECK: store i32
120; CHECK-NOT: store i32
121; CHECK: br i1
122define void @loop64_with_count4(ptr nocapture %a) {
123entry:
124  br label %for.body
125
126for.body:                                         ; preds = %for.body, %entry
127  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
128  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
129  %0 = load i32, ptr %arrayidx, align 4
130  %inc = add nsw i32 %0, 1
131  store i32 %inc, ptr %arrayidx, align 4
132  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
133  %exitcond = icmp eq i64 %indvars.iv.next, 64
134  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
135
136for.end:                                          ; preds = %for.body
137  ret void
138}
139!5 = !{!5, !6}
140!6 = !{!"llvm.loop.unroll.count", i32 4}
141
142; #pragma clang loop unroll(full)
143; Full unrolling is requested, but loop has a runtime trip count so
144; no unrolling should occur.
145;
146; CHECK-LABEL: @runtime_loop_with_full(
147; CHECK: store i32
148; CHECK-NOT: store i32
149define void @runtime_loop_with_full(ptr nocapture %a, i32 %b) {
150entry:
151  %cmp3 = icmp sgt i32 %b, 0
152  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
153
154for.body:                                         ; preds = %entry, %for.body
155  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
156  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
157  %0 = load i32, ptr %arrayidx, align 4
158  %inc = add nsw i32 %0, 1
159  store i32 %inc, ptr %arrayidx, align 4
160  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
161  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
162  %exitcond = icmp eq i32 %lftr.wideiv, %b
163  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
164
165for.end:                                          ; preds = %for.body, %entry
166  ret void
167}
168!8 = !{!8, !4}
169
170; #pragma clang loop unroll_count(4)
171; Loop has a runtime trip count.  Runtime unrolling should occur and loop
172; should be duplicated (original and 4x unrolled) if remainder is allowed,
173; otherwise loop should not be unrolled.
174;
175; CHECK-LABEL: @runtime_loop_with_count4(
176; CHECK: for.body
177; CHECK: store
178; REM: store
179; REM: store
180; REM: store
181; CHECK-NOT: store
182; CHECK: br i1
183; REM: for.body.epil:
184; REM: store
185; NOREM-NOT: for.body.epil:
186; NOREM-NOT: store
187; CHECK-NOT: store
188; REM: br i1
189; NOREM-NOT: br i1
190define void @runtime_loop_with_count4(ptr nocapture %a, i32 %b) {
191entry:
192  %cmp3 = icmp sgt i32 %b, 0
193  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
194
195for.body:                                         ; preds = %entry, %for.body
196  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
197  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
198  %0 = load i32, ptr %arrayidx, align 4
199  %inc = add nsw i32 %0, 1
200  store i32 %inc, ptr %arrayidx, align 4
201  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
202  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
203  %exitcond = icmp eq i32 %lftr.wideiv, %b
204  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
205
206for.end:                                          ; preds = %for.body, %entry
207  ret void
208}
209!9 = !{!9, !6}
210
211; #pragma clang loop unroll_count(1)
212; Loop should not be unrolled
213;
214; CHECK-LABEL: @unroll_1(
215; CHECK: store i32
216; CHECK-NOT: store i32
217; CHECK: br i1
218define void @unroll_1(ptr nocapture %a, i32 %b) {
219entry:
220  br label %for.body
221
222for.body:                                         ; preds = %for.body, %entry
223  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
224  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
225  %0 = load i32, ptr %arrayidx, align 4
226  %inc = add nsw i32 %0, 1
227  store i32 %inc, ptr %arrayidx, align 4
228  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
229  %exitcond = icmp eq i64 %indvars.iv.next, 4
230  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
231
232for.end:                                          ; preds = %for.body
233  ret void
234}
235!10 = !{!10, !11}
236!11 = !{!"llvm.loop.unroll.count", i32 1}
237
238; #pragma clang loop unroll(enable)
239; Loop should be fully unrolled.
240;
241; CHECK-LABEL: @loop64_with_enable(
242; CHECK-NOT: br i1
243define void @loop64_with_enable(ptr nocapture %a) {
244entry:
245  br label %for.body
246
247for.body:                                         ; preds = %for.body, %entry
248  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
249  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
250  %0 = load i32, ptr %arrayidx, align 4
251  %inc = add nsw i32 %0, 1
252  store i32 %inc, ptr %arrayidx, align 4
253  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
254  %exitcond = icmp eq i64 %indvars.iv.next, 64
255  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
256
257for.end:                                          ; preds = %for.body
258  ret void
259}
260!12 = !{!12, !13}
261!13 = !{!"llvm.loop.unroll.enable"}
262
263; #pragma clang loop unroll(enable)
264; Loop has a runtime trip count and should be runtime unrolled and duplicated
265; (original and 8x) if remainder is allowed, otherwise it should not be
266; unrolled.
267;
268; CHECK-LABEL: @runtime_loop_with_enable(
269; CHECK: for.body:
270; CHECK: store i32
271; REM: store i32
272; REM: store i32
273; REM: store i32
274; REM: store i32
275; REM: store i32
276; REM: store i32
277; REM: store i32
278; CHECK-NOT: store i32
279; CHECK: br i1
280; REM: for.body.epil:
281; NOREM-NOT: for.body.epil:
282; REM: store
283; CHECK-NOT: store
284; REM: br i1
285; NOREM-NOT: br i1
286define void @runtime_loop_with_enable(ptr nocapture %a, i32 %b) {
287entry:
288  %cmp3 = icmp sgt i32 %b, 0
289  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
290
291for.body:                                         ; preds = %entry, %for.body
292  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
293  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
294  %0 = load i32, ptr %arrayidx, align 4
295  %inc = add nsw i32 %0, 1
296  store i32 %inc, ptr %arrayidx, align 4
297  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
298  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
299  %exitcond = icmp eq i32 %lftr.wideiv, %b
300  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !14
301
302for.end:                                          ; preds = %for.body, %entry
303  ret void
304}
305!14 = !{!14, !13}
306
307; #pragma clang loop unroll_count(3)
308; Loop has a runtime trip count.  Runtime unrolling should occur and loop
309; should be duplicated (original and 3x unrolled) if remainder is allowed,
310; otherwise it should not be unrolled.
311;
312; CHECK-LABEL: @runtime_loop_with_count3(
313; CHECK: for.body
314; CHECK: store
315; REM: store
316; REM: store
317; CHECK-NOT: store
318; CHECK: br i1
319; REM: for.body.epil:
320; REM: store
321; NOREM-NOT: for.body.epil:
322; NOREM-NOT: store
323; CHECK-NOT: store
324; REM: br i1
325define void @runtime_loop_with_count3(ptr nocapture %a, i32 %b) {
326entry:
327  %cmp3 = icmp sgt i32 %b, 0
328  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16
329
330for.body:                                         ; preds = %entry, %for.body
331  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
332  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
333  %0 = load i32, ptr %arrayidx, align 4
334  %inc = add nsw i32 %0, 1
335  store i32 %inc, ptr %arrayidx, align 4
336  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
337  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
338  %exitcond = icmp eq i32 %lftr.wideiv, %b
339  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
340
341for.end:                                          ; preds = %for.body, %entry
342  ret void
343}
344!15 = !{!15, !16}
345!16 = !{!"llvm.loop.unroll.count", i32 3}
346