1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass identifies loops where we can generate the PPC branch instructions 11 // that decrement and test the count register (CTR) (bdnz and friends). 12 // 13 // The pattern that defines the induction variable can changed depending on 14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt' 15 // normalizes induction variables, and the Loop Strength Reduction pass 16 // run by 'llc' may also make changes to the induction variable. 17 // 18 // Criteria for CTR loops: 19 // - Countable loops (w/ ind. var for a trip count) 20 // - Try inner-most loops first 21 // - No nested CTR loops. 22 // - No function calls in loops. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "PPC.h" 27 #include "PPCSubtarget.h" 28 #include "PPCTargetMachine.h" 29 #include "PPCTargetTransformInfo.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/Analysis/AssumptionCache.h" 33 #include "llvm/Analysis/CodeMetrics.h" 34 #include "llvm/Analysis/LoopInfo.h" 35 #include "llvm/Analysis/ScalarEvolutionExpander.h" 36 #include "llvm/Analysis/TargetLibraryInfo.h" 37 #include "llvm/Analysis/TargetTransformInfo.h" 38 #include "llvm/CodeGen/TargetPassConfig.h" 39 #include "llvm/CodeGen/TargetSchedule.h" 40 #include "llvm/IR/Constants.h" 41 #include "llvm/IR/DerivedTypes.h" 42 #include "llvm/IR/Dominators.h" 43 #include "llvm/IR/InlineAsm.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Module.h" 47 #include "llvm/IR/ValueHandle.h" 48 #include "llvm/PassSupport.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/Debug.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include "llvm/Transforms/Scalar.h" 53 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 54 #include "llvm/Transforms/Utils/Local.h" 55 #include "llvm/Transforms/Utils/LoopUtils.h" 56 57 #ifndef NDEBUG 58 #include "llvm/CodeGen/MachineDominators.h" 59 #include "llvm/CodeGen/MachineFunction.h" 60 #include "llvm/CodeGen/MachineFunctionPass.h" 61 #include "llvm/CodeGen/MachineRegisterInfo.h" 62 #endif 63 64 using namespace llvm; 65 66 #define DEBUG_TYPE "ctrloops" 67 68 #ifndef NDEBUG 69 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); 70 #endif 71 72 // The latency of mtctr is only justified if there are more than 4 73 // comparisons that will be removed as a result. 74 static cl::opt<unsigned> 75 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, 76 cl::desc("Loops with a constant trip count smaller than " 77 "this value will not use the count register.")); 78 79 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); 80 81 namespace llvm { 82 void initializePPCCTRLoopsPass(PassRegistry&); 83 #ifndef NDEBUG 84 void initializePPCCTRLoopsVerifyPass(PassRegistry&); 85 #endif 86 } 87 88 namespace { 89 struct PPCCTRLoops : public FunctionPass { 90 91 #ifndef NDEBUG 92 static int Counter; 93 #endif 94 95 public: 96 static char ID; 97 98 PPCCTRLoops() : FunctionPass(ID) { 99 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 100 } 101 102 bool runOnFunction(Function &F) override; 103 104 void getAnalysisUsage(AnalysisUsage &AU) const override { 105 AU.addRequired<LoopInfoWrapperPass>(); 106 AU.addPreserved<LoopInfoWrapperPass>(); 107 AU.addRequired<DominatorTreeWrapperPass>(); 108 AU.addPreserved<DominatorTreeWrapperPass>(); 109 AU.addRequired<ScalarEvolutionWrapperPass>(); 110 AU.addRequired<AssumptionCacheTracker>(); 111 AU.addRequired<TargetTransformInfoWrapperPass>(); 112 } 113 114 private: 115 bool mightUseCTR(BasicBlock *BB); 116 bool convertToCTRLoop(Loop *L); 117 118 private: 119 const PPCTargetMachine *TM; 120 const PPCSubtarget *STI; 121 const PPCTargetLowering *TLI; 122 const DataLayout *DL; 123 const TargetLibraryInfo *LibInfo; 124 const TargetTransformInfo *TTI; 125 LoopInfo *LI; 126 ScalarEvolution *SE; 127 DominatorTree *DT; 128 bool PreserveLCSSA; 129 TargetSchedModel SchedModel; 130 }; 131 132 char PPCCTRLoops::ID = 0; 133 #ifndef NDEBUG 134 int PPCCTRLoops::Counter = 0; 135 #endif 136 137 #ifndef NDEBUG 138 struct PPCCTRLoopsVerify : public MachineFunctionPass { 139 public: 140 static char ID; 141 142 PPCCTRLoopsVerify() : MachineFunctionPass(ID) { 143 initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); 144 } 145 146 void getAnalysisUsage(AnalysisUsage &AU) const override { 147 AU.addRequired<MachineDominatorTree>(); 148 MachineFunctionPass::getAnalysisUsage(AU); 149 } 150 151 bool runOnMachineFunction(MachineFunction &MF) override; 152 153 private: 154 MachineDominatorTree *MDT; 155 }; 156 157 char PPCCTRLoopsVerify::ID = 0; 158 #endif // NDEBUG 159 } // end anonymous namespace 160 161 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 162 false, false) 163 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 164 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 165 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 166 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 167 false, false) 168 169 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } 170 171 #ifndef NDEBUG 172 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 173 "PowerPC CTR Loops Verify", false, false) 174 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 175 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 176 "PowerPC CTR Loops Verify", false, false) 177 178 FunctionPass *llvm::createPPCCTRLoopsVerify() { 179 return new PPCCTRLoopsVerify(); 180 } 181 #endif // NDEBUG 182 183 bool PPCCTRLoops::runOnFunction(Function &F) { 184 if (skipFunction(F)) 185 return false; 186 187 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 188 if (!TPC) 189 return false; 190 191 TM = &TPC->getTM<PPCTargetMachine>(); 192 STI = TM->getSubtargetImpl(F); 193 TLI = STI->getTargetLowering(); 194 195 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 196 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 197 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 198 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 199 DL = &F.getParent()->getDataLayout(); 200 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 201 LibInfo = TLIP ? &TLIP->getTLI() : nullptr; 202 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 203 204 bool MadeChange = false; 205 206 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); 207 I != E; ++I) { 208 Loop *L = *I; 209 if (!L->getParentLoop()) 210 MadeChange |= convertToCTRLoop(L); 211 } 212 213 return MadeChange; 214 } 215 216 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { 217 if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) 218 return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); 219 220 return false; 221 } 222 223 // Determining the address of a TLS variable results in a function call in 224 // certain TLS models. 225 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { 226 const auto *GV = dyn_cast<GlobalValue>(MemAddr); 227 if (!GV) { 228 // Recurse to check for constants that refer to TLS global variables. 229 if (const auto *CV = dyn_cast<Constant>(MemAddr)) 230 for (const auto &CO : CV->operands()) 231 if (memAddrUsesCTR(TM, CO)) 232 return true; 233 234 return false; 235 } 236 237 if (!GV->isThreadLocal()) 238 return false; 239 TLSModel::Model Model = TM.getTLSModel(GV); 240 return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; 241 } 242 243 // Loop through the inline asm constraints and look for something that clobbers 244 // ctr. 245 static bool asmClobbersCTR(InlineAsm *IA) { 246 InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); 247 for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { 248 InlineAsm::ConstraintInfo &C = CIV[i]; 249 if (C.Type != InlineAsm::isInput) 250 for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) 251 if (StringRef(C.Codes[j]).equals_lower("{ctr}")) 252 return true; 253 } 254 return false; 255 } 256 257 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { 258 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); 259 J != JE; ++J) { 260 if (CallInst *CI = dyn_cast<CallInst>(J)) { 261 // Inline ASM is okay, unless it clobbers the ctr register. 262 if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { 263 if (asmClobbersCTR(IA)) 264 return true; 265 continue; 266 } 267 268 if (Function *F = CI->getCalledFunction()) { 269 // Most intrinsics don't become function calls, but some might. 270 // sin, cos, exp and log are always calls. 271 unsigned Opcode = 0; 272 if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { 273 switch (F->getIntrinsicID()) { 274 default: continue; 275 // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr 276 // we're definitely using CTR. 277 case Intrinsic::ppc_is_decremented_ctr_nonzero: 278 case Intrinsic::ppc_mtctr: 279 return true; 280 281 // VisualStudio defines setjmp as _setjmp 282 #if defined(_MSC_VER) && defined(setjmp) && \ 283 !defined(setjmp_undefined_for_msvc) 284 # pragma push_macro("setjmp") 285 # undef setjmp 286 # define setjmp_undefined_for_msvc 287 #endif 288 289 case Intrinsic::setjmp: 290 291 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) 292 // let's return it to _setjmp state 293 # pragma pop_macro("setjmp") 294 # undef setjmp_undefined_for_msvc 295 #endif 296 297 case Intrinsic::longjmp: 298 299 // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp 300 // because, although it does clobber the counter register, the 301 // control can't then return to inside the loop unless there is also 302 // an eh_sjlj_setjmp. 303 case Intrinsic::eh_sjlj_setjmp: 304 305 case Intrinsic::memcpy: 306 case Intrinsic::memmove: 307 case Intrinsic::memset: 308 case Intrinsic::powi: 309 case Intrinsic::log: 310 case Intrinsic::log2: 311 case Intrinsic::log10: 312 case Intrinsic::exp: 313 case Intrinsic::exp2: 314 case Intrinsic::pow: 315 case Intrinsic::sin: 316 case Intrinsic::cos: 317 return true; 318 case Intrinsic::copysign: 319 if (CI->getArgOperand(0)->getType()->getScalarType()-> 320 isPPC_FP128Ty()) 321 return true; 322 else 323 continue; // ISD::FCOPYSIGN is never a library call. 324 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 325 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 326 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 327 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 328 case Intrinsic::rint: Opcode = ISD::FRINT; break; 329 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 330 case Intrinsic::round: Opcode = ISD::FROUND; break; 331 case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; 332 case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; 333 case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; 334 case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; 335 } 336 } 337 338 // PowerPC does not use [US]DIVREM or other library calls for 339 // operations on regular types which are not otherwise library calls 340 // (i.e. soft float or atomics). If adapting for targets that do, 341 // additional care is required here. 342 343 LibFunc Func; 344 if (!F->hasLocalLinkage() && F->hasName() && LibInfo && 345 LibInfo->getLibFunc(F->getName(), Func) && 346 LibInfo->hasOptimizedCodeGen(Func)) { 347 // Non-read-only functions are never treated as intrinsics. 348 if (!CI->onlyReadsMemory()) 349 return true; 350 351 // Conversion happens only for FP calls. 352 if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) 353 return true; 354 355 switch (Func) { 356 default: return true; 357 case LibFunc_copysign: 358 case LibFunc_copysignf: 359 continue; // ISD::FCOPYSIGN is never a library call. 360 case LibFunc_copysignl: 361 return true; 362 case LibFunc_fabs: 363 case LibFunc_fabsf: 364 case LibFunc_fabsl: 365 continue; // ISD::FABS is never a library call. 366 case LibFunc_sqrt: 367 case LibFunc_sqrtf: 368 case LibFunc_sqrtl: 369 Opcode = ISD::FSQRT; break; 370 case LibFunc_floor: 371 case LibFunc_floorf: 372 case LibFunc_floorl: 373 Opcode = ISD::FFLOOR; break; 374 case LibFunc_nearbyint: 375 case LibFunc_nearbyintf: 376 case LibFunc_nearbyintl: 377 Opcode = ISD::FNEARBYINT; break; 378 case LibFunc_ceil: 379 case LibFunc_ceilf: 380 case LibFunc_ceill: 381 Opcode = ISD::FCEIL; break; 382 case LibFunc_rint: 383 case LibFunc_rintf: 384 case LibFunc_rintl: 385 Opcode = ISD::FRINT; break; 386 case LibFunc_round: 387 case LibFunc_roundf: 388 case LibFunc_roundl: 389 Opcode = ISD::FROUND; break; 390 case LibFunc_trunc: 391 case LibFunc_truncf: 392 case LibFunc_truncl: 393 Opcode = ISD::FTRUNC; break; 394 case LibFunc_fmin: 395 case LibFunc_fminf: 396 case LibFunc_fminl: 397 Opcode = ISD::FMINNUM; break; 398 case LibFunc_fmax: 399 case LibFunc_fmaxf: 400 case LibFunc_fmaxl: 401 Opcode = ISD::FMAXNUM; break; 402 } 403 } 404 405 if (Opcode) { 406 EVT EVTy = 407 TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true); 408 409 if (EVTy == MVT::Other) 410 return true; 411 412 if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) 413 continue; 414 else if (EVTy.isVector() && 415 TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) 416 continue; 417 418 return true; 419 } 420 } 421 422 return true; 423 } else if (isa<BinaryOperator>(J) && 424 J->getType()->getScalarType()->isPPC_FP128Ty()) { 425 // Most operations on ppc_f128 values become calls. 426 return true; 427 } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || 428 isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { 429 CastInst *CI = cast<CastInst>(J); 430 if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || 431 CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || 432 isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || 433 isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) 434 return true; 435 } else if (isLargeIntegerTy(!TM->isPPC64(), 436 J->getType()->getScalarType()) && 437 (J->getOpcode() == Instruction::UDiv || 438 J->getOpcode() == Instruction::SDiv || 439 J->getOpcode() == Instruction::URem || 440 J->getOpcode() == Instruction::SRem)) { 441 return true; 442 } else if (!TM->isPPC64() && 443 isLargeIntegerTy(false, J->getType()->getScalarType()) && 444 (J->getOpcode() == Instruction::Shl || 445 J->getOpcode() == Instruction::AShr || 446 J->getOpcode() == Instruction::LShr)) { 447 // Only on PPC32, for 128-bit integers (specifically not 64-bit 448 // integers), these might be runtime calls. 449 return true; 450 } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { 451 // On PowerPC, indirect jumps use the counter register. 452 return true; 453 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { 454 if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) 455 return true; 456 } 457 458 if (STI->useSoftFloat()) { 459 switch(J->getOpcode()) { 460 case Instruction::FAdd: 461 case Instruction::FSub: 462 case Instruction::FMul: 463 case Instruction::FDiv: 464 case Instruction::FRem: 465 case Instruction::FPTrunc: 466 case Instruction::FPExt: 467 case Instruction::FPToUI: 468 case Instruction::FPToSI: 469 case Instruction::UIToFP: 470 case Instruction::SIToFP: 471 case Instruction::FCmp: 472 return true; 473 } 474 } 475 476 for (Value *Operand : J->operands()) 477 if (memAddrUsesCTR(*TM, Operand)) 478 return true; 479 } 480 481 return false; 482 } 483 bool PPCCTRLoops::convertToCTRLoop(Loop *L) { 484 bool MadeChange = false; 485 486 // Do not convert small short loops to CTR loop. 487 unsigned ConstTripCount = SE->getSmallConstantTripCount(L); 488 if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { 489 SmallPtrSet<const Value *, 32> EphValues; 490 auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 491 *L->getHeader()->getParent()); 492 CodeMetrics::collectEphemeralValues(L, &AC, EphValues); 493 CodeMetrics Metrics; 494 for (BasicBlock *BB : L->blocks()) 495 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 496 // 6 is an approximate latency for the mtctr instruction. 497 if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) 498 return false; 499 } 500 501 // Process nested loops first. 502 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 503 MadeChange |= convertToCTRLoop(*I); 504 DEBUG(dbgs() << "Nested loop converted\n"); 505 } 506 507 // If a nested loop has been converted, then we can't convert this loop. 508 if (MadeChange) 509 return MadeChange; 510 511 #ifndef NDEBUG 512 // Stop trying after reaching the limit (if any). 513 int Limit = CTRLoopLimit; 514 if (Limit >= 0) { 515 if (Counter >= CTRLoopLimit) 516 return false; 517 Counter++; 518 } 519 #endif 520 521 // We don't want to spill/restore the counter register, and so we don't 522 // want to use the counter register if the loop contains calls. 523 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 524 I != IE; ++I) 525 if (mightUseCTR(*I)) 526 return MadeChange; 527 528 SmallVector<BasicBlock*, 4> ExitingBlocks; 529 L->getExitingBlocks(ExitingBlocks); 530 531 // If there is an exit edge known to be frequently taken, 532 // we should not transform this loop. 533 for (auto &BB : ExitingBlocks) { 534 Instruction *TI = BB->getTerminator(); 535 if (!TI) continue; 536 537 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 538 uint64_t TrueWeight = 0, FalseWeight = 0; 539 if (!BI->isConditional() || 540 !BI->extractProfMetadata(TrueWeight, FalseWeight)) 541 continue; 542 543 // If the exit path is more frequent than the loop path, 544 // we return here without further analysis for this loop. 545 bool TrueIsExit = !L->contains(BI->getSuccessor(0)); 546 if (( TrueIsExit && FalseWeight < TrueWeight) || 547 (!TrueIsExit && FalseWeight > TrueWeight)) 548 return MadeChange; 549 } 550 } 551 552 BasicBlock *CountedExitBlock = nullptr; 553 const SCEV *ExitCount = nullptr; 554 BranchInst *CountedExitBranch = nullptr; 555 for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), 556 IE = ExitingBlocks.end(); I != IE; ++I) { 557 const SCEV *EC = SE->getExitCount(L, *I); 558 DEBUG(dbgs() << "Exit Count for " << *L << " from block " << 559 (*I)->getName() << ": " << *EC << "\n"); 560 if (isa<SCEVCouldNotCompute>(EC)) 561 continue; 562 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { 563 if (ConstEC->getValue()->isZero()) 564 continue; 565 } else if (!SE->isLoopInvariant(EC, L)) 566 continue; 567 568 if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) 569 continue; 570 571 // We now have a loop-invariant count of loop iterations (which is not the 572 // constant zero) for which we know that this loop will not exit via this 573 // exisiting block. 574 575 // We need to make sure that this block will run on every loop iteration. 576 // For this to be true, we must dominate all blocks with backedges. Such 577 // blocks are in-loop predecessors to the header block. 578 bool NotAlways = false; 579 for (pred_iterator PI = pred_begin(L->getHeader()), 580 PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { 581 if (!L->contains(*PI)) 582 continue; 583 584 if (!DT->dominates(*I, *PI)) { 585 NotAlways = true; 586 break; 587 } 588 } 589 590 if (NotAlways) 591 continue; 592 593 // Make sure this blocks ends with a conditional branch. 594 Instruction *TI = (*I)->getTerminator(); 595 if (!TI) 596 continue; 597 598 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 599 if (!BI->isConditional()) 600 continue; 601 602 CountedExitBranch = BI; 603 } else 604 continue; 605 606 // Note that this block may not be the loop latch block, even if the loop 607 // has a latch block. 608 CountedExitBlock = *I; 609 ExitCount = EC; 610 break; 611 } 612 613 if (!CountedExitBlock) 614 return MadeChange; 615 616 BasicBlock *Preheader = L->getLoopPreheader(); 617 618 // If we don't have a preheader, then insert one. If we already have a 619 // preheader, then we can use it (except if the preheader contains a use of 620 // the CTR register because some such uses might be reordered by the 621 // selection DAG after the mtctr instruction). 622 if (!Preheader || mightUseCTR(Preheader)) 623 Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); 624 if (!Preheader) 625 return MadeChange; 626 627 DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); 628 629 // Insert the count into the preheader and replace the condition used by the 630 // selected branch. 631 MadeChange = true; 632 633 SCEVExpander SCEVE(*SE, *DL, "loopcnt"); 634 LLVMContext &C = SE->getContext(); 635 Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); 636 if (!ExitCount->getType()->isPointerTy() && 637 ExitCount->getType() != CountType) 638 ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); 639 ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); 640 Value *ECValue = 641 SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); 642 643 IRBuilder<> CountBuilder(Preheader->getTerminator()); 644 Module *M = Preheader->getParent()->getParent(); 645 Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, 646 CountType); 647 CountBuilder.CreateCall(MTCTRFunc, ECValue); 648 649 IRBuilder<> CondBuilder(CountedExitBranch); 650 Value *DecFunc = 651 Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); 652 Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); 653 Value *OldCond = CountedExitBranch->getCondition(); 654 CountedExitBranch->setCondition(NewCond); 655 656 // The false branch must exit the loop. 657 if (!L->contains(CountedExitBranch->getSuccessor(0))) 658 CountedExitBranch->swapSuccessors(); 659 660 // The old condition may be dead now, and may have even created a dead PHI 661 // (the original induction variable). 662 RecursivelyDeleteTriviallyDeadInstructions(OldCond); 663 // Run through the basic blocks of the loop and see if any of them have dead 664 // PHIs that can be removed. 665 for (auto I : L->blocks()) 666 DeleteDeadPHIs(I); 667 668 ++NumCTRLoops; 669 return MadeChange; 670 } 671 672 #ifndef NDEBUG 673 static bool clobbersCTR(const MachineInstr &MI) { 674 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 675 const MachineOperand &MO = MI.getOperand(i); 676 if (MO.isReg()) { 677 if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) 678 return true; 679 } else if (MO.isRegMask()) { 680 if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) 681 return true; 682 } 683 } 684 685 return false; 686 } 687 688 static bool verifyCTRBranch(MachineBasicBlock *MBB, 689 MachineBasicBlock::iterator I) { 690 MachineBasicBlock::iterator BI = I; 691 SmallSet<MachineBasicBlock *, 16> Visited; 692 SmallVector<MachineBasicBlock *, 8> Preds; 693 bool CheckPreds; 694 695 if (I == MBB->begin()) { 696 Visited.insert(MBB); 697 goto queue_preds; 698 } else 699 --I; 700 701 check_block: 702 Visited.insert(MBB); 703 if (I == MBB->end()) 704 goto queue_preds; 705 706 CheckPreds = true; 707 for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { 708 unsigned Opc = I->getOpcode(); 709 if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { 710 CheckPreds = false; 711 break; 712 } 713 714 if (I != BI && clobbersCTR(*I)) { 715 DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName() 716 << ") instruction " << *I << " clobbers CTR, invalidating " 717 << printMBBReference(*BI->getParent()) << " (" 718 << BI->getParent()->getFullName() << ") instruction " << *BI 719 << "\n"); 720 return false; 721 } 722 723 if (I == IE) 724 break; 725 } 726 727 if (!CheckPreds && Preds.empty()) 728 return true; 729 730 if (CheckPreds) { 731 queue_preds: 732 if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { 733 DEBUG(dbgs() << "Unable to find a MTCTR instruction for " 734 << printMBBReference(*BI->getParent()) << " (" 735 << BI->getParent()->getFullName() << ") instruction " << *BI 736 << "\n"); 737 return false; 738 } 739 740 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 741 PIE = MBB->pred_end(); PI != PIE; ++PI) 742 Preds.push_back(*PI); 743 } 744 745 do { 746 MBB = Preds.pop_back_val(); 747 if (!Visited.count(MBB)) { 748 I = MBB->getLastNonDebugInstr(); 749 goto check_block; 750 } 751 } while (!Preds.empty()); 752 753 return true; 754 } 755 756 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { 757 MDT = &getAnalysis<MachineDominatorTree>(); 758 759 // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before 760 // any other instructions that might clobber the ctr register. 761 for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); 762 I != IE; ++I) { 763 MachineBasicBlock *MBB = &*I; 764 if (!MDT->isReachableFromEntry(MBB)) 765 continue; 766 767 for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), 768 MIIE = MBB->end(); MII != MIIE; ++MII) { 769 unsigned Opc = MII->getOpcode(); 770 if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || 771 Opc == PPC::BDZ8 || Opc == PPC::BDZ) 772 if (!verifyCTRBranch(MBB, MII)) 773 llvm_unreachable("Invalid PPC CTR loop!"); 774 } 775 } 776 777 return false; 778 } 779 #endif // NDEBUG 780