xref: /llvm-project/llvm/test/CodeGen/AArch64/loop-micro-op-buffer-size-t99.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; REQUIRES: asserts
2; RUN: opt -mcpu=thunderx2t99 -passes=loop-unroll --debug-only=loop-unroll --debug-only=basicblock-utils -S -unroll-allow-partial < %s 2>&1 | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6; CHECK: Loop Unroll: F[foo] Loop %loop.header
7; CHECK:   Loop Size = 18
8; CHECK:   Exiting block %loop.inc: TripCount=512, TripMultiple=0, BreakoutTrip=0
9; CHECK: UNROLLING loop %loop.header by 4
10; CHECK: Merging:
11; CHECK: Loop Unroll: F[foo] Loop %loop.2.header
12; CHECK:   Loop Size = 19
13; CHECK:   Exiting block %loop.2.inc: TripCount=512, TripMultiple=0, BreakoutTrip=0
14; CHECK: UNROLLING loop %loop.2.header by 4
15; CHECK: Merging:
16; CHECK: %counter = phi i32 [ 0, %entry ], [ %inc.3, %loop.inc.3 ]
17; CHECK: %val = add nuw nsw i32 %counter, 5
18; CHECK: %val1 = add nuw nsw i32 %counter, 6
19; CHECK: %val2 = add nuw nsw i32 %counter, 7
20; CHECK: %val3 = add nuw nsw i32 %counter, 8
21; CHECK: %val4 = add nuw nsw i32 %counter, 9
22; CHECK: %val5 = add nuw nsw i32 %counter, 10
23; CHECK-NOT: %val = add i32 %counter, 5
24; CHECK-NOT: %val = add i32 %counter, 6
25; CHECK-NOT: %val = add i32 %counter, 7
26; CHECK-NOT: %val = add i32 %counter, 8
27; CHECK-NOT: %val = add i32 %counter, 9
28; CHECK-NOT: %val = add i32 %counter, 10
29; CHECK: %counter.2 = phi i32 [ 0, %exit.0 ], [ %inc.2.3, %loop.2.inc.3 ]
30
31define void @foo(ptr %out) {
32entry:
33  %0 = alloca [1024 x i32]
34  %x0 = alloca [1024 x i32]
35  %x01 = alloca [1024 x i32]
36  %x02 = alloca [1024 x i32]
37  %x03 = alloca [1024 x i32]
38  %x04 = alloca [1024 x i32]
39  %x05 = alloca [1024 x i32]
40  %x06 = alloca [1024 x i32]
41  br label %loop.header
42
43loop.header:
44  %counter = phi i32 [0, %entry], [%inc, %loop.inc]
45  br label %loop.body
46
47loop.body:
48  %ptr = getelementptr [1024 x i32], ptr %0, i32 0, i32 %counter
49  store i32 %counter, ptr %ptr
50  %val = add i32 %counter, 5
51  %xptr = getelementptr [1024 x i32], ptr %x0, i32 0, i32 %counter
52  store i32 %val, ptr %xptr
53  %val1 = add i32 %counter, 6
54  %xptr1 = getelementptr [1024 x i32], ptr %x01, i32 0, i32 %counter
55  store i32 %val1, ptr %xptr1
56  %val2 = add i32 %counter, 7
57  %xptr2 = getelementptr [1024 x i32], ptr %x02, i32 0, i32 %counter
58  store i32 %val2, ptr %xptr2
59  %val3 = add i32 %counter, 8
60  %xptr3 = getelementptr [1024 x i32], ptr %x03, i32 0, i32 %counter
61  store i32 %val3, ptr %xptr3
62  %val4 = add i32 %counter, 9
63  %xptr4 = getelementptr [1024 x i32], ptr %x04, i32 0, i32 %counter
64  store i32 %val4, ptr %xptr4
65  %val5 = add i32 %counter, 10
66  %xptr5 = getelementptr [1024 x i32], ptr %x05, i32 0, i32 %counter
67  store i32 %val5, ptr %xptr5
68  br label %loop.inc
69
70loop.inc:
71  %inc = add i32 %counter, 2
72  %1 = icmp sge i32 %inc, 1023
73  br i1 %1, label  %exit.0, label %loop.header
74
75exit.0:
76  %2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 5
77  %3 = load i32, ptr %2
78  store i32 %3, ptr %out
79  br label %loop.2.header
80
81
82loop.2.header:
83  %counter.2 = phi i32 [0, %exit.0], [%inc.2, %loop.2.inc]
84  br label %loop.2.body
85
86loop.2.body:
87  %ptr.2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 %counter.2
88  store i32 %counter.2, ptr %ptr.2
89  %val.2 = add i32 %counter.2, 5
90  %xptr.2 = getelementptr [1024 x i32], ptr %x0, i32 0, i32 %counter.2
91  store i32 %val.2, ptr %xptr.2
92  %val1.2 = add i32 %counter.2, 6
93  %xptr1.2 = getelementptr [1024 x i32], ptr %x01, i32 0, i32 %counter.2
94  store i32 %val1, ptr %xptr1.2
95  %val2.2 = add i32 %counter.2, 7
96  %xptr2.2 = getelementptr [1024 x i32], ptr %x02, i32 0, i32 %counter.2
97  store i32 %val2, ptr %xptr2.2
98  %val3.2 = add i32 %counter.2, 8
99  %xptr3.2 = getelementptr [1024 x i32], ptr %x03, i32 0, i32 %counter.2
100  store i32 %val3.2, ptr %xptr3.2
101  %val4.2 = add i32 %counter.2, 9
102  %xptr4.2 = getelementptr [1024 x i32], ptr %x04, i32 0, i32 %counter.2
103  store i32 %val4.2, ptr %xptr4.2
104  %val5.2 = add i32 %counter.2, 10
105  %xptr5.2 = getelementptr [1024 x i32], ptr %x05, i32 0, i32 %counter.2
106  store i32 %val5.2, ptr %xptr5.2
107  %xptr6.2 = getelementptr [1024 x i32], ptr %x06, i32 0, i32 %counter.2
108  store i32 %val5.2, ptr %xptr6.2
109  br label %loop.2.inc
110
111loop.2.inc:
112  %inc.2 = add i32 %counter.2, 2
113  %4 = icmp sge i32 %inc.2, 1023
114  br i1 %4, label  %exit.2, label %loop.2.header
115
116exit.2:
117  %x2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 6
118  %x3 = load i32, ptr %x2
119  %out2 = getelementptr i32, ptr %out, i32 1
120  store i32 %3, ptr %out2
121  ret void
122}
123