1 //===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass implements a simple loop unroller. It works best when loops have 11 // been canonicalized by the -indvars pass, allowing it to determine the trip 12 // counts of loops easily. 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Scalar.h" 16 #include "llvm/Analysis/CodeMetrics.h" 17 #include "llvm/Analysis/LoopPass.h" 18 #include "llvm/Analysis/ScalarEvolution.h" 19 #include "llvm/Analysis/TargetTransformInfo.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/Dominators.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/Metadata.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include "llvm/Transforms/Utils/UnrollLoop.h" 28 #include <climits> 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "loop-unroll" 33 34 static cl::opt<unsigned> 35 UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, 36 cl::desc("The cut-off point for automatic loop unrolling")); 37 38 static cl::opt<unsigned> 39 UnrollCount("unroll-count", cl::init(0), cl::Hidden, 40 cl::desc("Use this unroll count for all loops including those with " 41 "unroll_count pragma values, for testing purposes")); 42 43 static cl::opt<bool> 44 UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, 45 cl::desc("Allows loops to be partially unrolled until " 46 "-unroll-threshold loop size is reached.")); 47 48 static cl::opt<bool> 49 UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, 50 cl::desc("Unroll loops with run-time trip counts")); 51 52 // Maximum allowed unroll count for a loop being fully unrolled 53 // because of a pragma unroll(enable) statement (ie, metadata 54 // "llvm.loopunroll.enable" is true). This prevents unexpected 55 // behavior like crashing when using this pragma on high trip count 56 // loops. 57 static const unsigned PragmaFullUnrollCountLimit = 1024; 58 59 namespace { 60 class LoopUnroll : public LoopPass { 61 public: 62 static char ID; // Pass ID, replacement for typeid 63 LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) { 64 CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); 65 CurrentCount = (C == -1) ? UnrollCount : unsigned(C); 66 CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; 67 CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R; 68 69 UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); 70 UserAllowPartial = (P != -1) || 71 (UnrollAllowPartial.getNumOccurrences() > 0); 72 UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0); 73 UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0); 74 75 initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); 76 } 77 78 /// A magic value for use with the Threshold parameter to indicate 79 /// that the loop unroll should be performed regardless of how much 80 /// code expansion would result. 81 static const unsigned NoThreshold = UINT_MAX; 82 83 // Threshold to use when optsize is specified (and there is no 84 // explicit -unroll-threshold). 85 static const unsigned OptSizeUnrollThreshold = 50; 86 87 // Default unroll count for loops with run-time trip count if 88 // -unroll-count is not set 89 static const unsigned UnrollRuntimeCount = 8; 90 91 unsigned CurrentCount; 92 unsigned CurrentThreshold; 93 bool CurrentAllowPartial; 94 bool CurrentRuntime; 95 bool UserCount; // CurrentCount is user-specified. 96 bool UserThreshold; // CurrentThreshold is user-specified. 97 bool UserAllowPartial; // CurrentAllowPartial is user-specified. 98 bool UserRuntime; // CurrentRuntime is user-specified. 99 100 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 101 102 /// This transformation requires natural loop information & requires that 103 /// loop preheaders be inserted into the CFG... 104 /// 105 void getAnalysisUsage(AnalysisUsage &AU) const override { 106 AU.addRequired<LoopInfo>(); 107 AU.addPreserved<LoopInfo>(); 108 AU.addRequiredID(LoopSimplifyID); 109 AU.addPreservedID(LoopSimplifyID); 110 AU.addRequiredID(LCSSAID); 111 AU.addPreservedID(LCSSAID); 112 AU.addRequired<ScalarEvolution>(); 113 AU.addPreserved<ScalarEvolution>(); 114 AU.addRequired<TargetTransformInfo>(); 115 // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. 116 // If loop unroll does not preserve dom info then LCSSA pass on next 117 // loop will receive invalid dom info. 118 // For now, recreate dom info, if loop is unrolled. 119 AU.addPreserved<DominatorTreeWrapperPass>(); 120 } 121 }; 122 } 123 124 char LoopUnroll::ID = 0; 125 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) 126 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 127 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 128 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 129 INITIALIZE_PASS_DEPENDENCY(LCSSA) 130 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 131 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) 132 133 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial, 134 int Runtime) { 135 return new LoopUnroll(Threshold, Count, AllowPartial, Runtime); 136 } 137 138 Pass *llvm::createSimpleLoopUnrollPass() { 139 return llvm::createLoopUnrollPass(-1, -1, 0, 0); 140 } 141 142 /// ApproximateLoopSize - Approximate the size of the loop. 143 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, 144 bool &NotDuplicatable, 145 const TargetTransformInfo &TTI) { 146 CodeMetrics Metrics; 147 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 148 I != E; ++I) 149 Metrics.analyzeBasicBlock(*I, TTI); 150 NumCalls = Metrics.NumInlineCandidates; 151 NotDuplicatable = Metrics.notDuplicatable; 152 153 unsigned LoopSize = Metrics.NumInsts; 154 155 // Don't allow an estimate of size zero. This would allows unrolling of loops 156 // with huge iteration counts, which is a compile time problem even if it's 157 // not a problem for code quality. 158 if (LoopSize == 0) LoopSize = 1; 159 160 return LoopSize; 161 } 162 163 // Returns the value associated with the given metadata node name (for 164 // example, "llvm.loopunroll.count"). If no such named metadata node 165 // exists, then nullptr is returned. 166 static const ConstantInt *GetUnrollMetadataValue(const Loop *L, 167 StringRef Name) { 168 MDNode *LoopID = L->getLoopID(); 169 if (!LoopID) return nullptr; 170 171 // First operand should refer to the loop id itself. 172 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 173 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 174 175 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 176 const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 177 if (!MD) continue; 178 179 const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 180 if (!S) continue; 181 182 if (Name.equals(S->getString())) { 183 assert(MD->getNumOperands() == 2 && 184 "Unroll hint metadata should have two operands."); 185 return cast<ConstantInt>(MD->getOperand(1)); 186 } 187 } 188 return nullptr; 189 } 190 191 // Returns true if the loop has an unroll(enable) pragma. 192 static bool HasUnrollEnablePragma(const Loop *L) { 193 const ConstantInt *EnableValue = 194 GetUnrollMetadataValue(L, "llvm.loopunroll.enable"); 195 return (EnableValue && EnableValue->getZExtValue()); 196 return false; 197 } 198 199 // Returns true if the loop has an unroll(disable) pragma. 200 static bool HasUnrollDisablePragma(const Loop *L) { 201 const ConstantInt *EnableValue = 202 GetUnrollMetadataValue(L, "llvm.loopunroll.enable"); 203 return (EnableValue && !EnableValue->getZExtValue()); 204 return false; 205 } 206 207 // Check for unroll_count(N) pragma. If found, return true and set 208 // Count to the integer parameter of the pragma. 209 static bool HasUnrollCountPragma(const Loop *L, int &Count) { 210 const ConstantInt *CountValue = 211 GetUnrollMetadataValue(L, "llvm.loopunroll.count"); 212 if (CountValue) { 213 Count = CountValue->getZExtValue(); 214 assert(Count >= 1 && "Unroll count must be positive."); 215 return true; 216 } 217 return false; 218 } 219 220 bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { 221 if (skipOptnoneFunction(L)) 222 return false; 223 224 LoopInfo *LI = &getAnalysis<LoopInfo>(); 225 ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); 226 const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); 227 228 BasicBlock *Header = L->getHeader(); 229 DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() 230 << "] Loop %" << Header->getName() << "\n"); 231 (void)Header; 232 233 TargetTransformInfo::UnrollingPreferences UP; 234 UP.Threshold = CurrentThreshold; 235 UP.OptSizeThreshold = OptSizeUnrollThreshold; 236 UP.PartialThreshold = CurrentThreshold; 237 UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; 238 UP.Count = CurrentCount; 239 UP.MaxCount = UINT_MAX; 240 UP.Partial = CurrentAllowPartial; 241 UP.Runtime = CurrentRuntime; 242 TTI.getUnrollingPreferences(L, UP); 243 244 // Determine the current unrolling threshold. While this is normally set 245 // from UnrollThreshold, it is overridden to a smaller value if the current 246 // function is marked as optimize-for-size, and the unroll threshold was 247 // not user specified. 248 unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; 249 unsigned PartialThreshold = 250 UserThreshold ? CurrentThreshold : UP.PartialThreshold; 251 if (!UserThreshold && 252 Header->getParent()->getAttributes(). 253 hasAttribute(AttributeSet::FunctionIndex, 254 Attribute::OptimizeForSize)) { 255 Threshold = UP.OptSizeThreshold; 256 PartialThreshold = UP.PartialOptSizeThreshold; 257 } 258 259 // Find trip count and trip multiple if count is not available 260 unsigned TripCount = 0; 261 unsigned TripMultiple = 1; 262 // Find "latch trip count". UnrollLoop assumes that control cannot exit 263 // via the loop latch on any iteration prior to TripCount. The loop may exit 264 // early via an earlier branch. 265 BasicBlock *LatchBlock = L->getLoopLatch(); 266 if (LatchBlock) { 267 TripCount = SE->getSmallConstantTripCount(L, LatchBlock); 268 TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); 269 } 270 271 // User-specified count (either as a command-line option or 272 // constructor parameter) has highest precedence. 273 unsigned Count = UserCount ? CurrentCount : 0; 274 275 // If there is no user-specified count, unroll pragmas have the next 276 // highest precendence. 277 if (Count == 0) { 278 if (HasUnrollDisablePragma(L)) { 279 // Loop has unroll(disable) pragma. 280 return false; 281 } 282 283 int PragmaCount; 284 if (HasUnrollCountPragma(L, PragmaCount)) { 285 if (PragmaCount == 1) { 286 // Nothing to do. 287 return false; 288 } 289 Count = PragmaCount; 290 Threshold = NoThreshold; 291 } else if (HasUnrollEnablePragma(L)) { 292 // Loop has unroll(enable) pragma without a unroll_count pragma, 293 // so unroll loop fully if possible. 294 if (TripCount == 0) { 295 DEBUG(dbgs() << " Loop has unroll(enable) pragma but loop cannot be " 296 "fully unrolled because trip count is unknown.\n"); 297 // Continue with standard heuristic unrolling. 298 } else if (TripCount > PragmaFullUnrollCountLimit) { 299 DEBUG(dbgs() << " Loop has unroll(enable) pragma but loop cannot be " 300 "fully unrolled because loop count is greater than " 301 << PragmaFullUnrollCountLimit); 302 // Continue with standard heuristic unrolling. 303 } else { 304 Count = TripCount; 305 Threshold = NoThreshold; 306 } 307 } 308 } 309 310 if (Count == 0) 311 Count = UP.Count; 312 313 bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime; 314 if (Runtime && Count == 0 && TripCount == 0) 315 Count = UnrollRuntimeCount; 316 317 if (Count == 0) { 318 // Conservative heuristic: if we know the trip count, see if we can 319 // completely unroll (subject to the threshold, checked below); otherwise 320 // try to find greatest modulo of the trip count which is still under 321 // threshold value. 322 if (TripCount == 0) 323 return false; 324 Count = TripCount; 325 } 326 327 // Enforce the threshold. 328 if (Threshold != NoThreshold && PartialThreshold != NoThreshold) { 329 unsigned NumInlineCandidates; 330 bool notDuplicatable; 331 unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, 332 notDuplicatable, TTI); 333 DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); 334 if (notDuplicatable) { 335 DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" 336 << " instructions.\n"); 337 return false; 338 } 339 if (NumInlineCandidates != 0) { 340 DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); 341 return false; 342 } 343 uint64_t Size = (uint64_t)LoopSize*Count; 344 if (TripCount != 1 && 345 (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) { 346 if (Size > Threshold) 347 DEBUG(dbgs() << " Too large to fully unroll with count: " << Count 348 << " because size: " << Size << ">" << Threshold << "\n"); 349 350 bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; 351 if (!AllowPartial && !(Runtime && TripCount == 0)) { 352 DEBUG(dbgs() << " will not try to unroll partially because " 353 << "-unroll-allow-partial not given\n"); 354 return false; 355 } 356 if (TripCount) { 357 // Reduce unroll count to be modulo of TripCount for partial unrolling 358 Count = PartialThreshold / LoopSize; 359 while (Count != 0 && TripCount%Count != 0) 360 Count--; 361 } 362 else if (Runtime) { 363 // Reduce unroll count to be a lower power-of-two value 364 while (Count != 0 && Size > PartialThreshold) { 365 Count >>= 1; 366 Size = LoopSize*Count; 367 } 368 } 369 if (Count > UP.MaxCount) 370 Count = UP.MaxCount; 371 if (Count < 2) { 372 DEBUG(dbgs() << " could not unroll partially\n"); 373 return false; 374 } 375 DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); 376 } 377 } 378 379 // Unroll the loop. 380 if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, this, &LPM)) 381 return false; 382 383 return true; 384 } 385