1; RUN: opt < %s -cache-line-size=32 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck -check-prefix=SMALLER-CACHELINE %s 2; RUN: opt < %s -cache-line-size=256 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck -check-prefix=LARGER-CACHELINE %s 3 4;; This test is similar to test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll, 5;; with differences that it tests the scenarios where an option for cache line size is 6;; specified with different values. 7 8; Check IndexedReference::computeRefCost can handle type differences between 9; Stride and TripCount 10 11; Round costs up to the nearest whole number i.e. in 'for.cond5' cost is calculated 12.5 and 12; it makes more sense to say 13 cache lines are used rather than 12 cache lines. 13 14; SMALLER-CACHELINE: Loop 'for.cond' has cost = 256 15; LARGER-CACHELINE: Loop 'for.cond' has cost = 32 16%struct._Handleitem = type { ptr } 17 18define void @handle_to_ptr(ptr %blocks) { 19; Preheader: 20entry: 21 br label %for.cond 22 23; Loop: 24for.cond: ; preds = %for.body, %entry 25 %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ] 26 %cmp = icmp ult i32 %i.0, 1024 27 br i1 %cmp, label %for.body, label %for.end 28 29for.body: ; preds = %for.cond 30 %idxprom = zext i32 %i.0 to i64 31 %arrayidx = getelementptr inbounds ptr, ptr %blocks, i64 %idxprom 32 store ptr null, ptr %arrayidx, align 8 33 %inc = add nuw nsw i32 %i.0, 1 34 br label %for.cond 35 36; Exit blocks 37for.end: ; preds = %for.cond 38 ret void 39} 40 41; Check IndexedReference::computeRefCost can handle type differences between 42; Coeff and ElemSize. 43 44; SMALLER-CACHELINE: Loop 'for.cond' has cost = 100000000 45; SMALLER-CACHELINE: Loop 'for.cond1' has cost = 1000000 46; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 130000 47; LARGER-CACHELINE: Loop 'for.cond' has cost = 100000000 48; LARGER-CACHELINE: Loop 'for.cond1' has cost = 1000000 49; LARGER-CACHELINE: Loop 'for.cond5' has cost = 20000 50@data = external dso_local global [2 x [4 x [18 x i32]]], align 1 51 52define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) { 53entry: 54 br label %for.cond 55 56for.cond: 57 %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ] 58 %idxprom = zext i16 %i.0 to i32 59 br i1 %b2, label %for.end19, label %for.cond1 60 61for.cond1: 62 %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ] 63 br i1 %b1, label %for.inc17, label %for.cond5.preheader 64 65for.cond5.preheader: 66 %idxprom10 = zext i16 %j.0 to i32 67 br label %for.cond5 68 69for.cond5: 70 %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ] 71 br i1 %b0, label %for.inc14, label %for.inc 72 73for.inc: 74 %idxprom12 = zext i16 %k.0 to i32 75 %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12 76 store i32 7, ptr %arrayidx13, align 1 77 %inc = add nuw nsw i16 %k.0, 1 78 br label %for.cond5 79 80for.inc14: 81 %inc15 = add nuw nsw i16 %j.0, 1 82 br label %for.cond1 83 84for.inc17: 85 %inc18 = add nuw nsw i16 %i.0, 1 86 br label %for.cond 87 88for.end19: 89 ret void 90} 91 92; Check IndexedReference::computeRefCost can handle negative stride 93 94; SMALLER-CACHELINE: Loop 'for.neg.cond' has cost = 256 95; LARGER-CACHELINE: Loop 'for.neg.cond' has cost = 32 96define void @handle_to_ptr_neg_stride(ptr %blocks) { 97; Preheader: 98entry: 99 br label %for.neg.cond 100 101; Loop: 102for.neg.cond: ; preds = %for.neg.body, %entry 103 %i.0 = phi i32 [ 1023, %entry ], [ %dec, %for.neg.body ] 104 %cmp = icmp sgt i32 %i.0, 0 105 br i1 %cmp, label %for.neg.body, label %for.neg.end 106 107for.neg.body: ; preds = %for.neg.cond 108 %idxprom = zext i32 %i.0 to i64 109 %arrayidx = getelementptr inbounds ptr, ptr %blocks, i64 %idxprom 110 store ptr null, ptr %arrayidx, align 8 111 %dec = add nsw i32 %i.0, -1 112 br label %for.neg.cond 113 114; Exit blocks 115for.neg.end: ; preds = %for.neg.cond 116 ret void 117} 118 119 120 121; for (int i = 40960; i > 0; i--) 122; B[i] = B[40960 - i]; 123 124; FIXME: Currently negative access functions are treated the same as positive 125; access functions. When this is fixed this testcase should have a cost 126; approximately 2x higher. 127 128; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10241 129; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1281 130define void @Test2(ptr %B) { 131entry: 132 br label %for.cond2 133 134for.cond2: ; preds = %for.body, %entry 135 %i.0 = phi i32 [ 40960, %entry ], [ %dec, %for.body ] 136 %cmp = icmp sgt i32 %i.0, 0 137 br i1 %cmp, label %for.body, label %for.end 138 139for.body: ; preds = %for.cond 140 %sub = sub nsw i32 40960, %i.0 141 %idxprom = sext i32 %sub to i64 142 %arrayidx = getelementptr inbounds double, ptr %B, i64 %idxprom 143 %0 = load double, ptr %arrayidx, align 8 144 %idxprom1 = sext i32 %i.0 to i64 145 %arrayidx2 = getelementptr inbounds double, ptr %B, i64 %idxprom1 146 store double %0, ptr %arrayidx2, align 8 147 %dec = add nsw i32 %i.0, -1 148 br label %for.cond2 149 150for.end: ; preds = %for.cond 151 ret void 152} 153 154 155 156; for (i = 40960; i > 0; i--) 157; C[i] = C[i]; 158 159; SMALLER-CACHELINE: Loop 'for.cond3' has cost = 10241 160; LARGER-CACHELINE: Loop 'for.cond3' has cost = 1281 161define void @Test3(ptr %C) { 162entry: 163 br label %for.cond3 164 165for.cond3: ; preds = %for.body, %entry 166 %i.0 = phi i32 [ 40960, %entry ], [ %dec, %for.body ] 167 %cmp = icmp sgt i32 %i.0, 0 168 br i1 %cmp, label %for.body, label %for.end 169 170for.body: ; preds = %for.cond 171 %idxprom = sext i32 %i.0 to i64 172 %arrayidx = getelementptr inbounds ptr, ptr %C, i64 %idxprom 173 %0 = load ptr, ptr %arrayidx, align 8 174 %idxprom1 = sext i32 %i.0 to i64 175 %arrayidx2 = getelementptr inbounds ptr, ptr %C, i64 %idxprom1 176 store ptr %0, ptr %arrayidx2, align 8 177 %dec = add nsw i32 %i.0, -1 178 br label %for.cond3 179 180for.end: ; preds = %for.cond 181 ret void 182} 183 184 185 186; for (i = 0; i < 40960; i++) 187; D[i] = D[i]; 188 189; SMALLER-CACHELINE: Loop 'for.cond4' has cost = 10241 190; LARGER-CACHELINE: Loop 'for.cond4' has cost = 1281 191define void @Test4(ptr %D) { 192entry: 193 br label %for.cond4 194 195for.cond4: ; preds = %for.body, %entry 196 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 197 %cmp = icmp slt i32 %i.0, 40960 198 br i1 %cmp, label %for.body, label %for.end 199 200for.body: ; preds = %for.cond 201 %idxprom = sext i32 %i.0 to i64 202 %arrayidx = getelementptr inbounds ptr, ptr %D, i64 %idxprom 203 %0 = load ptr, ptr %arrayidx, align 8 204 %idxprom1 = sext i32 %i.0 to i64 205 %arrayidx2 = getelementptr inbounds ptr, ptr %D, i64 %idxprom1 206 store ptr %0, ptr %arrayidx2, align 8 207 %inc = add nsw i32 %i.0, 1 208 br label %for.cond4 209 210for.end: ; preds = %for.cond 211 ret void 212} 213