xref: /llvm-project/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll (revision 670259466b238176ac302c8dedf806d2b2be7e0c)
1; RUN: opt < %s  -cache-line-size=32 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck -check-prefix=SMALLER-CACHELINE %s
2; RUN: opt < %s  -cache-line-size=256 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck -check-prefix=LARGER-CACHELINE %s
3
4;; This test is similar to test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll,
5;; with differences that it tests the scenarios where an option for cache line size is
6;; specified with different values.
7
8; Check IndexedReference::computeRefCost can handle type differences between
9; Stride and TripCount
10
11; Round costs up to the nearest whole number i.e. in 'for.cond5' cost is calculated 12.5 and
12; it makes more sense to say 13 cache lines are used rather than 12 cache lines.
13
14; SMALLER-CACHELINE: Loop 'for.cond' has cost = 256
15; LARGER-CACHELINE: Loop 'for.cond' has cost = 32
16%struct._Handleitem = type { ptr }
17
18define void @handle_to_ptr(ptr %blocks) {
19; Preheader:
20entry:
21  br label %for.cond
22
23; Loop:
24for.cond:                                         ; preds = %for.body, %entry
25  %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ]
26  %cmp = icmp ult i32 %i.0, 1024
27  br i1 %cmp, label %for.body, label %for.end
28
29for.body:                                         ; preds = %for.cond
30  %idxprom = zext i32 %i.0 to i64
31  %arrayidx = getelementptr inbounds ptr, ptr %blocks, i64 %idxprom
32  store ptr null, ptr %arrayidx, align 8
33  %inc = add nuw nsw i32 %i.0, 1
34  br label %for.cond
35
36; Exit blocks
37for.end:                                          ; preds = %for.cond
38  ret void
39}
40
41; Check IndexedReference::computeRefCost can handle type differences between
42; Coeff and ElemSize.
43
44; SMALLER-CACHELINE: Loop 'for.cond' has cost = 100000000
45; SMALLER-CACHELINE: Loop 'for.cond1' has cost = 1000000
46; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 130000
47; LARGER-CACHELINE: Loop 'for.cond' has cost = 100000000
48; LARGER-CACHELINE: Loop 'for.cond1' has cost = 1000000
49; LARGER-CACHELINE: Loop 'for.cond5' has cost = 20000
50@data = external dso_local global [2 x [4 x [18 x i32]]], align 1
51
52define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) {
53entry:
54  br label %for.cond
55
56for.cond:
57  %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ]
58  %idxprom = zext i16 %i.0 to i32
59  br i1 %b2, label %for.end19, label %for.cond1
60
61for.cond1:
62  %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ]
63  br i1 %b1, label %for.inc17, label %for.cond5.preheader
64
65for.cond5.preheader:
66  %idxprom10 = zext i16 %j.0 to i32
67  br label %for.cond5
68
69for.cond5:
70  %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ]
71  br i1 %b0, label %for.inc14, label %for.inc
72
73for.inc:
74  %idxprom12 = zext i16 %k.0 to i32
75  %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12
76  store i32 7, ptr %arrayidx13, align 1
77  %inc = add nuw nsw i16 %k.0, 1
78  br label %for.cond5
79
80for.inc14:
81  %inc15 = add nuw nsw i16 %j.0, 1
82  br label %for.cond1
83
84for.inc17:
85  %inc18 = add nuw nsw i16 %i.0, 1
86  br label %for.cond
87
88for.end19:
89  ret void
90}
91
92; Check IndexedReference::computeRefCost can handle negative stride
93
94; SMALLER-CACHELINE: Loop 'for.neg.cond' has cost = 256
95; LARGER-CACHELINE: Loop 'for.neg.cond' has cost = 32
96define void @handle_to_ptr_neg_stride(ptr %blocks) {
97; Preheader:
98entry:
99  br label %for.neg.cond
100
101; Loop:
102for.neg.cond:                                         ; preds = %for.neg.body, %entry
103  %i.0 = phi i32 [ 1023, %entry ], [ %dec, %for.neg.body ]
104  %cmp = icmp sgt i32 %i.0, 0
105  br i1 %cmp, label %for.neg.body, label %for.neg.end
106
107for.neg.body:                                         ; preds = %for.neg.cond
108  %idxprom = zext i32 %i.0 to i64
109  %arrayidx = getelementptr inbounds ptr, ptr %blocks, i64 %idxprom
110  store ptr null, ptr %arrayidx, align 8
111  %dec = add nsw i32 %i.0, -1
112  br label %for.neg.cond
113
114; Exit blocks
115for.neg.end:                                          ; preds = %for.neg.cond
116  ret void
117}
118
119
120
121;   for (int i = 40960; i > 0; i--)
122;     B[i] = B[40960 - i];
123
124; FIXME: Currently negative access functions are treated the same as positive
125; access functions. When this is fixed this testcase should have a cost
126; approximately 2x higher.
127
128; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10241
129; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1281
130define void @Test2(ptr %B) {
131entry:
132  br label %for.cond2
133
134for.cond2:                                         ; preds = %for.body, %entry
135  %i.0 = phi i32 [ 40960, %entry ], [ %dec, %for.body ]
136  %cmp = icmp sgt i32 %i.0, 0
137  br i1 %cmp, label %for.body, label %for.end
138
139for.body:                                         ; preds = %for.cond
140  %sub = sub nsw i32 40960, %i.0
141  %idxprom = sext i32 %sub to i64
142  %arrayidx = getelementptr inbounds double, ptr %B, i64 %idxprom
143  %0 = load double, ptr %arrayidx, align 8
144  %idxprom1 = sext i32 %i.0 to i64
145  %arrayidx2 = getelementptr inbounds double, ptr %B, i64 %idxprom1
146  store double %0, ptr %arrayidx2, align 8
147  %dec = add nsw i32 %i.0, -1
148  br label %for.cond2
149
150for.end:                                          ; preds = %for.cond
151  ret void
152}
153
154
155
156;   for (i = 40960; i > 0; i--)
157;     C[i] = C[i];
158
159; SMALLER-CACHELINE: Loop 'for.cond3' has cost = 10241
160; LARGER-CACHELINE: Loop 'for.cond3' has cost = 1281
161define void @Test3(ptr %C) {
162entry:
163  br label %for.cond3
164
165for.cond3:                                         ; preds = %for.body, %entry
166  %i.0 = phi i32 [ 40960, %entry ], [ %dec, %for.body ]
167  %cmp = icmp sgt i32 %i.0, 0
168  br i1 %cmp, label %for.body, label %for.end
169
170for.body:                                         ; preds = %for.cond
171  %idxprom = sext i32 %i.0 to i64
172  %arrayidx = getelementptr inbounds ptr, ptr %C, i64 %idxprom
173  %0 = load ptr, ptr %arrayidx, align 8
174  %idxprom1 = sext i32 %i.0 to i64
175  %arrayidx2 = getelementptr inbounds ptr, ptr %C, i64 %idxprom1
176  store ptr %0, ptr %arrayidx2, align 8
177  %dec = add nsw i32 %i.0, -1
178  br label %for.cond3
179
180for.end:                                          ; preds = %for.cond
181  ret void
182}
183
184
185
186;  for (i = 0; i < 40960; i++)
187;     D[i] = D[i];
188
189; SMALLER-CACHELINE: Loop 'for.cond4' has cost = 10241
190; LARGER-CACHELINE: Loop 'for.cond4' has cost = 1281
191define void @Test4(ptr %D) {
192entry:
193  br label %for.cond4
194
195for.cond4:                                         ; preds = %for.body, %entry
196  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
197  %cmp = icmp slt i32 %i.0, 40960
198  br i1 %cmp, label %for.body, label %for.end
199
200for.body:                                         ; preds = %for.cond
201  %idxprom = sext i32 %i.0 to i64
202  %arrayidx = getelementptr inbounds ptr, ptr %D, i64 %idxprom
203  %0 = load ptr, ptr %arrayidx, align 8
204  %idxprom1 = sext i32 %i.0 to i64
205  %arrayidx2 = getelementptr inbounds ptr, ptr %D, i64 %idxprom1
206  store ptr %0, ptr %arrayidx2, align 8
207  %inc = add nsw i32 %i.0, 1
208  br label %for.cond4
209
210for.end:                                          ; preds = %for.cond
211  ret void
212}
213