xref: /llvm-project/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll (revision 670259466b238176ac302c8dedf806d2b2be7e0c)
1; RUN: opt < %s -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-n32:64"
4target triple = "powerpc64le-unknown-linux-gnu"
5
6; void foo(long n, long m, long o, int A[n][m][o]) {
7;   for (long i = 0; i < n; i++)
8;     for (long j = 0; j < m; j++)
9;       for (long k = 0; k < o; k++)
10;         A[2*i+3][3*j-4][2*k+7] = 1;
11; }
12
13; CHECK: Loop 'for.i' has cost = 100000000
14; CHECK-NEXT: Loop 'for.j' has cost = 1000000
15; CHECK-NEXT: Loop 'for.k' has cost = 70000
16
17define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) {
18entry:
19  %cmp32 = icmp sgt i64 %n, 0
20  %cmp230 = icmp sgt i64 %m, 0
21  %cmp528 = icmp sgt i64 %o, 0
22  br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end
23
24for.cond1.preheader.lr.ph:                        ; preds = %entry
25  br i1 %cmp230, label %for.i.preheader, label %for.end
26
27for.i.preheader:                                  ; preds = %for.cond1.preheader.lr.ph
28  br i1 %cmp528, label %for.i.preheader.split, label %for.end
29
30for.i.preheader.split:                            ; preds = %for.i.preheader
31  br label %for.i
32
33for.i:                                            ; preds = %for.inci, %for.i.preheader.split
34  %i = phi i64 [ %inci, %for.inci ], [ 0, %for.i.preheader.split ]
35  %mul8 = shl i64 %i, 1
36  %add9 = add nsw i64 %mul8, 3
37  %0 = mul i64 %add9, %m
38  %sub = add i64 %0, -4
39  br label %for.j
40
41for.j:                                            ; preds = %for.incj, %for.i
42  %j = phi i64 [ %incj, %for.incj ], [ 0, %for.i ]
43  %mul7 = mul nsw i64 %j, 3
44  %tmp = add i64 %sub, %mul7
45  %tmp27 = mul i64 %tmp, %o
46  br label %for.k
47
48for.k:                                            ; preds = %for.k, %for.j.us
49  %k = phi i64 [ 0, %for.j ], [ %inck, %for.k ]
50
51  %mul = mul nsw i64 %k, 2
52  %arrayidx.sum = add i64 %mul, 7
53  %arrayidx10.sum = add i64 %arrayidx.sum, %tmp27
54  %arrayidx11 = getelementptr inbounds i32, ptr %A, i64 %arrayidx10.sum
55  store i32 1, ptr %arrayidx11, align 4
56
57  %inck = add nsw i64 %k, 1
58  %exitcond.us = icmp eq i64 %inck, %o
59  br i1 %exitcond.us, label %for.incj, label %for.k
60
61for.incj:                                         ; preds = %for.k
62  %incj = add nsw i64 %j, 1
63  %exitcond54.us = icmp eq i64 %incj, %m
64  br i1 %exitcond54.us, label %for.inci, label %for.j
65
66for.inci:                                         ; preds = %for.incj
67  %inci = add nsw i64 %i, 1
68  %exitcond55.us = icmp eq i64 %inci, %n
69  br i1 %exitcond55.us, label %for.end.loopexit, label %for.i
70
71for.end.loopexit:                                 ; preds = %for.inci
72  br label %for.end
73
74for.end:                                          ; preds = %for.end.loopexit, %for.cond1.preheader.lr.ph, %entry
75  ret void
76}
77
78; Loop i is supposed to have the largest cost and be placed
79; as the outermost loop. This test differs from foo() since the
80; loopnest has a suboptimal order j-i-k.
81; After D123400 we ensure that the order of loop cache analysis output
82; is loop i-j-k, despite the suboptimal order in the original loopnest.
83;
84; void foo(long n, long m, long o, int A[n][m][o]) {
85;   for (long j = 0; j < m; j++)
86;     for (long i = 0; i < n; i++)
87;       for (long k = 0; k < o; k++)
88;         A[2*i+3][2*j-4][2*k+7] = 1;
89; }
90
91; CHECK: Loop 'for.i' has cost = 100000000
92; CHECK-NEXT: Loop 'for.j' has cost = 1000000
93; CHECK-NEXT: Loop 'for.k' has cost = 70000
94
95define void @foo2(i64 %n, i64 %m, i64 %o, ptr %A) {
96entry:
97  %cmp32 = icmp sgt i64 %n, 0
98  %cmp230 = icmp sgt i64 %m, 0
99  %cmp528 = icmp sgt i64 %o, 0
100  br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end
101
102for.cond1.preheader.lr.ph:                        ; preds = %entry
103  br i1 %cmp230, label %for.j.preheader, label %for.end
104
105for.j.preheader:                                  ; preds = %for.cond1.preheader.lr.ph
106  br i1 %cmp528, label %for.j.preheader.split, label %for.end
107
108for.j.preheader.split:                            ; preds = %for.j.preheader
109  br label %for.j
110
111for.i:                                            ; preds = %for.inci, %for.j
112  %i = phi i64 [ %inci, %for.inci ], [ 0, %for.j ]
113  %mul8 = shl i64 %i, 1
114  %add9 = add nsw i64 %mul8, 3
115  %0 = mul i64 %add9, %m
116  %sub = add i64 %0, -4
117  %mul7 = mul nsw i64 %j, 2
118  %tmp = add i64 %sub, %mul7
119  %tmp27 = mul i64 %tmp, %o
120  br label %for.k
121
122for.j:                                            ; preds = %for.incj, %for.j.preheader.split
123  %j = phi i64 [ %incj, %for.incj ], [ 0, %for.j.preheader.split ]
124  br label %for.i
125
126for.k:                                            ; preds = %for.k, %for.i
127  %k = phi i64 [ 0, %for.i ], [ %inck, %for.k ]
128
129  %mul = mul nsw i64 %k, 2
130  %arrayidx.sum = add i64 %mul, 7
131  %arrayidx10.sum = add i64 %arrayidx.sum, %tmp27
132  %arrayidx11 = getelementptr inbounds i32, ptr %A, i64 %arrayidx10.sum
133  store i32 1, ptr %arrayidx11, align 4
134
135  %inck = add nsw i64 %k, 1
136  %exitcond.us = icmp eq i64 %inck, %o
137  br i1 %exitcond.us, label %for.inci, label %for.k
138
139for.incj:                                         ; preds = %for.inci
140  %incj = add nsw i64 %j, 1
141  %exitcond54.us = icmp eq i64 %incj, %m
142  br i1 %exitcond54.us, label %for.end.loopexit, label %for.j
143
144for.inci:                                         ; preds = %for.k
145  %inci = add nsw i64 %i, 1
146  %exitcond55.us = icmp eq i64 %inci, %n
147  br i1 %exitcond55.us, label %for.incj, label %for.i
148
149for.end.loopexit:                                 ; preds = %for.incj
150  br label %for.end
151
152for.end:                                          ; preds = %for.end.loopexit, %for.cond1.preheader.lr.ph, %entry
153  ret void
154}
155