1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass identifies loops where we can generate the PPC branch instructions 10 // that decrement and test the count register (CTR) (bdnz and friends). 11 // 12 // The pattern that defines the induction variable can changed depending on 13 // prior optimizations. For example, the IndVarSimplify phase run by 'opt' 14 // normalizes induction variables, and the Loop Strength Reduction pass 15 // run by 'llc' may also make changes to the induction variable. 16 // 17 // Criteria for CTR loops: 18 // - Countable loops (w/ ind. var for a trip count) 19 // - Try inner-most loops first 20 // - No nested CTR loops. 21 // - No function calls in loops. 22 // 23 //===----------------------------------------------------------------------===// 24 25 #include "PPC.h" 26 #include "PPCSubtarget.h" 27 #include "PPCTargetMachine.h" 28 #include "PPCTargetTransformInfo.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/Statistic.h" 31 #include "llvm/Analysis/AssumptionCache.h" 32 #include "llvm/Analysis/CFG.h" 33 #include "llvm/Analysis/CodeMetrics.h" 34 #include "llvm/Analysis/LoopInfo.h" 35 #include "llvm/Analysis/LoopIterator.h" 36 #include "llvm/Analysis/ScalarEvolutionExpander.h" 37 #include "llvm/Analysis/TargetLibraryInfo.h" 38 #include "llvm/Analysis/TargetTransformInfo.h" 39 #include "llvm/Transforms/Utils/Local.h" 40 #include "llvm/CodeGen/TargetPassConfig.h" 41 #include "llvm/CodeGen/TargetSchedule.h" 42 #include "llvm/IR/Constants.h" 43 #include "llvm/IR/DerivedTypes.h" 44 #include "llvm/IR/Dominators.h" 45 #include "llvm/IR/InlineAsm.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Module.h" 49 #include "llvm/IR/ValueHandle.h" 50 #include "llvm/PassSupport.h" 51 #include "llvm/Support/CommandLine.h" 52 #include "llvm/Support/Debug.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include "llvm/Transforms/Scalar.h" 55 #include "llvm/Transforms/Utils.h" 56 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 57 #include "llvm/Transforms/Utils/LoopUtils.h" 58 59 #ifndef NDEBUG 60 #include "llvm/CodeGen/MachineDominators.h" 61 #include "llvm/CodeGen/MachineFunction.h" 62 #include "llvm/CodeGen/MachineFunctionPass.h" 63 #include "llvm/CodeGen/MachineRegisterInfo.h" 64 #endif 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "ctrloops" 69 70 #ifndef NDEBUG 71 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); 72 #endif 73 74 // The latency of mtctr is only justified if there are more than 4 75 // comparisons that will be removed as a result. 76 static cl::opt<unsigned> 77 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, 78 cl::desc("Loops with a constant trip count smaller than " 79 "this value will not use the count register.")); 80 81 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); 82 83 namespace { 84 struct PPCCTRLoops : public FunctionPass { 85 86 #ifndef NDEBUG 87 static int Counter; 88 #endif 89 90 public: 91 static char ID; 92 93 PPCCTRLoops() : FunctionPass(ID) { 94 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 95 } 96 97 bool runOnFunction(Function &F) override; 98 99 void getAnalysisUsage(AnalysisUsage &AU) const override { 100 AU.addRequired<LoopInfoWrapperPass>(); 101 AU.addPreserved<LoopInfoWrapperPass>(); 102 AU.addRequired<DominatorTreeWrapperPass>(); 103 AU.addPreserved<DominatorTreeWrapperPass>(); 104 AU.addRequired<ScalarEvolutionWrapperPass>(); 105 AU.addRequired<AssumptionCacheTracker>(); 106 AU.addRequired<TargetTransformInfoWrapperPass>(); 107 } 108 109 private: 110 bool mightUseCTR(BasicBlock *BB); 111 bool convertToCTRLoop(Loop *L); 112 113 private: 114 const PPCTargetMachine *TM; 115 const PPCSubtarget *STI; 116 const PPCTargetLowering *TLI; 117 const DataLayout *DL; 118 const TargetLibraryInfo *LibInfo; 119 const TargetTransformInfo *TTI; 120 LoopInfo *LI; 121 ScalarEvolution *SE; 122 DominatorTree *DT; 123 bool PreserveLCSSA; 124 TargetSchedModel SchedModel; 125 }; 126 127 char PPCCTRLoops::ID = 0; 128 #ifndef NDEBUG 129 int PPCCTRLoops::Counter = 0; 130 #endif 131 132 #ifndef NDEBUG 133 struct PPCCTRLoopsVerify : public MachineFunctionPass { 134 public: 135 static char ID; 136 137 PPCCTRLoopsVerify() : MachineFunctionPass(ID) { 138 initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); 139 } 140 141 void getAnalysisUsage(AnalysisUsage &AU) const override { 142 AU.addRequired<MachineDominatorTree>(); 143 MachineFunctionPass::getAnalysisUsage(AU); 144 } 145 146 bool runOnMachineFunction(MachineFunction &MF) override; 147 148 private: 149 MachineDominatorTree *MDT; 150 }; 151 152 char PPCCTRLoopsVerify::ID = 0; 153 #endif // NDEBUG 154 } // end anonymous namespace 155 156 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 157 false, false) 158 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 159 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 160 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 161 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 162 false, false) 163 164 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } 165 166 #ifndef NDEBUG 167 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 168 "PowerPC CTR Loops Verify", false, false) 169 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 170 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 171 "PowerPC CTR Loops Verify", false, false) 172 173 FunctionPass *llvm::createPPCCTRLoopsVerify() { 174 return new PPCCTRLoopsVerify(); 175 } 176 #endif // NDEBUG 177 178 bool PPCCTRLoops::runOnFunction(Function &F) { 179 if (skipFunction(F)) 180 return false; 181 182 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 183 if (!TPC) 184 return false; 185 186 TM = &TPC->getTM<PPCTargetMachine>(); 187 STI = TM->getSubtargetImpl(F); 188 TLI = STI->getTargetLowering(); 189 190 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 191 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 192 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 193 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 194 DL = &F.getParent()->getDataLayout(); 195 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 196 LibInfo = TLIP ? &TLIP->getTLI() : nullptr; 197 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 198 SchedModel.init(STI); 199 200 bool MadeChange = false; 201 202 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); 203 I != E; ++I) { 204 Loop *L = *I; 205 if (!L->getParentLoop()) 206 MadeChange |= convertToCTRLoop(L); 207 } 208 209 return MadeChange; 210 } 211 212 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { 213 if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) 214 return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); 215 216 return false; 217 } 218 219 // Determining the address of a TLS variable results in a function call in 220 // certain TLS models. 221 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { 222 const auto *GV = dyn_cast<GlobalValue>(MemAddr); 223 if (!GV) { 224 // Recurse to check for constants that refer to TLS global variables. 225 if (const auto *CV = dyn_cast<Constant>(MemAddr)) 226 for (const auto &CO : CV->operands()) 227 if (memAddrUsesCTR(TM, CO)) 228 return true; 229 230 return false; 231 } 232 233 if (!GV->isThreadLocal()) 234 return false; 235 TLSModel::Model Model = TM.getTLSModel(GV); 236 return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; 237 } 238 239 // Loop through the inline asm constraints and look for something that clobbers 240 // ctr. 241 static bool asmClobbersCTR(InlineAsm *IA) { 242 InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); 243 for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { 244 InlineAsm::ConstraintInfo &C = CIV[i]; 245 if (C.Type != InlineAsm::isInput) 246 for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) 247 if (StringRef(C.Codes[j]).equals_lower("{ctr}")) 248 return true; 249 } 250 return false; 251 } 252 253 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { 254 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); 255 J != JE; ++J) { 256 if (CallInst *CI = dyn_cast<CallInst>(J)) { 257 // Inline ASM is okay, unless it clobbers the ctr register. 258 if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { 259 if (asmClobbersCTR(IA)) 260 return true; 261 continue; 262 } 263 264 if (Function *F = CI->getCalledFunction()) { 265 // Most intrinsics don't become function calls, but some might. 266 // sin, cos, exp and log are always calls. 267 unsigned Opcode = 0; 268 if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { 269 switch (F->getIntrinsicID()) { 270 default: continue; 271 // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr 272 // we're definitely using CTR. 273 case Intrinsic::ppc_is_decremented_ctr_nonzero: 274 case Intrinsic::ppc_mtctr: 275 return true; 276 277 // VisualStudio defines setjmp as _setjmp 278 #if defined(_MSC_VER) && defined(setjmp) && \ 279 !defined(setjmp_undefined_for_msvc) 280 # pragma push_macro("setjmp") 281 # undef setjmp 282 # define setjmp_undefined_for_msvc 283 #endif 284 285 case Intrinsic::setjmp: 286 287 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) 288 // let's return it to _setjmp state 289 # pragma pop_macro("setjmp") 290 # undef setjmp_undefined_for_msvc 291 #endif 292 293 case Intrinsic::longjmp: 294 295 // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp 296 // because, although it does clobber the counter register, the 297 // control can't then return to inside the loop unless there is also 298 // an eh_sjlj_setjmp. 299 case Intrinsic::eh_sjlj_setjmp: 300 301 case Intrinsic::memcpy: 302 case Intrinsic::memmove: 303 case Intrinsic::memset: 304 case Intrinsic::powi: 305 case Intrinsic::log: 306 case Intrinsic::log2: 307 case Intrinsic::log10: 308 case Intrinsic::exp: 309 case Intrinsic::exp2: 310 case Intrinsic::pow: 311 case Intrinsic::sin: 312 case Intrinsic::cos: 313 return true; 314 case Intrinsic::copysign: 315 if (CI->getArgOperand(0)->getType()->getScalarType()-> 316 isPPC_FP128Ty()) 317 return true; 318 else 319 continue; // ISD::FCOPYSIGN is never a library call. 320 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 321 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 322 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 323 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 324 case Intrinsic::rint: Opcode = ISD::FRINT; break; 325 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 326 case Intrinsic::round: Opcode = ISD::FROUND; break; 327 case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; 328 case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; 329 case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; 330 case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; 331 } 332 } 333 334 // PowerPC does not use [US]DIVREM or other library calls for 335 // operations on regular types which are not otherwise library calls 336 // (i.e. soft float or atomics). If adapting for targets that do, 337 // additional care is required here. 338 339 LibFunc Func; 340 if (!F->hasLocalLinkage() && F->hasName() && LibInfo && 341 LibInfo->getLibFunc(F->getName(), Func) && 342 LibInfo->hasOptimizedCodeGen(Func)) { 343 // Non-read-only functions are never treated as intrinsics. 344 if (!CI->onlyReadsMemory()) 345 return true; 346 347 // Conversion happens only for FP calls. 348 if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) 349 return true; 350 351 switch (Func) { 352 default: return true; 353 case LibFunc_copysign: 354 case LibFunc_copysignf: 355 continue; // ISD::FCOPYSIGN is never a library call. 356 case LibFunc_copysignl: 357 return true; 358 case LibFunc_fabs: 359 case LibFunc_fabsf: 360 case LibFunc_fabsl: 361 continue; // ISD::FABS is never a library call. 362 case LibFunc_sqrt: 363 case LibFunc_sqrtf: 364 case LibFunc_sqrtl: 365 Opcode = ISD::FSQRT; break; 366 case LibFunc_floor: 367 case LibFunc_floorf: 368 case LibFunc_floorl: 369 Opcode = ISD::FFLOOR; break; 370 case LibFunc_nearbyint: 371 case LibFunc_nearbyintf: 372 case LibFunc_nearbyintl: 373 Opcode = ISD::FNEARBYINT; break; 374 case LibFunc_ceil: 375 case LibFunc_ceilf: 376 case LibFunc_ceill: 377 Opcode = ISD::FCEIL; break; 378 case LibFunc_rint: 379 case LibFunc_rintf: 380 case LibFunc_rintl: 381 Opcode = ISD::FRINT; break; 382 case LibFunc_round: 383 case LibFunc_roundf: 384 case LibFunc_roundl: 385 Opcode = ISD::FROUND; break; 386 case LibFunc_trunc: 387 case LibFunc_truncf: 388 case LibFunc_truncl: 389 Opcode = ISD::FTRUNC; break; 390 case LibFunc_fmin: 391 case LibFunc_fminf: 392 case LibFunc_fminl: 393 Opcode = ISD::FMINNUM; break; 394 case LibFunc_fmax: 395 case LibFunc_fmaxf: 396 case LibFunc_fmaxl: 397 Opcode = ISD::FMAXNUM; break; 398 } 399 } 400 401 if (Opcode) { 402 EVT EVTy = 403 TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true); 404 405 if (EVTy == MVT::Other) 406 return true; 407 408 if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) 409 continue; 410 else if (EVTy.isVector() && 411 TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) 412 continue; 413 414 return true; 415 } 416 } 417 418 return true; 419 } else if (isa<BinaryOperator>(J) && 420 J->getType()->getScalarType()->isPPC_FP128Ty()) { 421 // Most operations on ppc_f128 values become calls. 422 return true; 423 } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || 424 isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { 425 CastInst *CI = cast<CastInst>(J); 426 if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || 427 CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || 428 isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || 429 isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) 430 return true; 431 } else if (isLargeIntegerTy(!TM->isPPC64(), 432 J->getType()->getScalarType()) && 433 (J->getOpcode() == Instruction::UDiv || 434 J->getOpcode() == Instruction::SDiv || 435 J->getOpcode() == Instruction::URem || 436 J->getOpcode() == Instruction::SRem)) { 437 return true; 438 } else if (!TM->isPPC64() && 439 isLargeIntegerTy(false, J->getType()->getScalarType()) && 440 (J->getOpcode() == Instruction::Shl || 441 J->getOpcode() == Instruction::AShr || 442 J->getOpcode() == Instruction::LShr)) { 443 // Only on PPC32, for 128-bit integers (specifically not 64-bit 444 // integers), these might be runtime calls. 445 return true; 446 } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { 447 // On PowerPC, indirect jumps use the counter register. 448 return true; 449 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { 450 if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) 451 return true; 452 } 453 454 // FREM is always a call. 455 if (J->getOpcode() == Instruction::FRem) 456 return true; 457 458 if (STI->useSoftFloat()) { 459 switch(J->getOpcode()) { 460 case Instruction::FAdd: 461 case Instruction::FSub: 462 case Instruction::FMul: 463 case Instruction::FDiv: 464 case Instruction::FPTrunc: 465 case Instruction::FPExt: 466 case Instruction::FPToUI: 467 case Instruction::FPToSI: 468 case Instruction::UIToFP: 469 case Instruction::SIToFP: 470 case Instruction::FCmp: 471 return true; 472 } 473 } 474 475 for (Value *Operand : J->operands()) 476 if (memAddrUsesCTR(*TM, Operand)) 477 return true; 478 } 479 480 return false; 481 } 482 bool PPCCTRLoops::convertToCTRLoop(Loop *L) { 483 bool MadeChange = false; 484 485 // Do not convert small short loops to CTR loop. 486 unsigned ConstTripCount = SE->getSmallConstantTripCount(L); 487 if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { 488 SmallPtrSet<const Value *, 32> EphValues; 489 auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 490 *L->getHeader()->getParent()); 491 CodeMetrics::collectEphemeralValues(L, &AC, EphValues); 492 CodeMetrics Metrics; 493 for (BasicBlock *BB : L->blocks()) 494 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 495 // 6 is an approximate latency for the mtctr instruction. 496 if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) 497 return false; 498 } 499 500 // Process nested loops first. 501 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 502 MadeChange |= convertToCTRLoop(*I); 503 LLVM_DEBUG(dbgs() << "Nested loop converted\n"); 504 } 505 506 // If a nested loop has been converted, then we can't convert this loop. 507 if (MadeChange) 508 return MadeChange; 509 510 // Bail out if the loop has irreducible control flow. 511 LoopBlocksRPO RPOT(L); 512 RPOT.perform(LI); 513 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) 514 return false; 515 516 #ifndef NDEBUG 517 // Stop trying after reaching the limit (if any). 518 int Limit = CTRLoopLimit; 519 if (Limit >= 0) { 520 if (Counter >= CTRLoopLimit) 521 return false; 522 Counter++; 523 } 524 #endif 525 526 // We don't want to spill/restore the counter register, and so we don't 527 // want to use the counter register if the loop contains calls. 528 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 529 I != IE; ++I) 530 if (mightUseCTR(*I)) 531 return MadeChange; 532 533 SmallVector<BasicBlock*, 4> ExitingBlocks; 534 L->getExitingBlocks(ExitingBlocks); 535 536 // If there is an exit edge known to be frequently taken, 537 // we should not transform this loop. 538 for (auto &BB : ExitingBlocks) { 539 Instruction *TI = BB->getTerminator(); 540 if (!TI) continue; 541 542 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 543 uint64_t TrueWeight = 0, FalseWeight = 0; 544 if (!BI->isConditional() || 545 !BI->extractProfMetadata(TrueWeight, FalseWeight)) 546 continue; 547 548 // If the exit path is more frequent than the loop path, 549 // we return here without further analysis for this loop. 550 bool TrueIsExit = !L->contains(BI->getSuccessor(0)); 551 if (( TrueIsExit && FalseWeight < TrueWeight) || 552 (!TrueIsExit && FalseWeight > TrueWeight)) 553 return MadeChange; 554 } 555 } 556 557 BasicBlock *CountedExitBlock = nullptr; 558 const SCEV *ExitCount = nullptr; 559 BranchInst *CountedExitBranch = nullptr; 560 for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), 561 IE = ExitingBlocks.end(); I != IE; ++I) { 562 const SCEV *EC = SE->getExitCount(L, *I); 563 LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block " 564 << (*I)->getName() << ": " << *EC << "\n"); 565 if (isa<SCEVCouldNotCompute>(EC)) 566 continue; 567 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { 568 if (ConstEC->getValue()->isZero()) 569 continue; 570 } else if (!SE->isLoopInvariant(EC, L)) 571 continue; 572 573 if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) 574 continue; 575 576 // If this exiting block is contained in a nested loop, it is not eligible 577 // for insertion of the branch-and-decrement since the inner loop would 578 // end up messing up the value in the CTR. 579 if (LI->getLoopFor(*I) != L) 580 continue; 581 582 // We now have a loop-invariant count of loop iterations (which is not the 583 // constant zero) for which we know that this loop will not exit via this 584 // existing block. 585 586 // We need to make sure that this block will run on every loop iteration. 587 // For this to be true, we must dominate all blocks with backedges. Such 588 // blocks are in-loop predecessors to the header block. 589 bool NotAlways = false; 590 for (pred_iterator PI = pred_begin(L->getHeader()), 591 PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { 592 if (!L->contains(*PI)) 593 continue; 594 595 if (!DT->dominates(*I, *PI)) { 596 NotAlways = true; 597 break; 598 } 599 } 600 601 if (NotAlways) 602 continue; 603 604 // Make sure this blocks ends with a conditional branch. 605 Instruction *TI = (*I)->getTerminator(); 606 if (!TI) 607 continue; 608 609 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 610 if (!BI->isConditional()) 611 continue; 612 613 CountedExitBranch = BI; 614 } else 615 continue; 616 617 // Note that this block may not be the loop latch block, even if the loop 618 // has a latch block. 619 CountedExitBlock = *I; 620 ExitCount = EC; 621 break; 622 } 623 624 if (!CountedExitBlock) 625 return MadeChange; 626 627 BasicBlock *Preheader = L->getLoopPreheader(); 628 629 // If we don't have a preheader, then insert one. If we already have a 630 // preheader, then we can use it (except if the preheader contains a use of 631 // the CTR register because some such uses might be reordered by the 632 // selection DAG after the mtctr instruction). 633 if (!Preheader || mightUseCTR(Preheader)) 634 Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA); 635 if (!Preheader) 636 return MadeChange; 637 638 LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() 639 << "\n"); 640 641 // Insert the count into the preheader and replace the condition used by the 642 // selected branch. 643 MadeChange = true; 644 645 SCEVExpander SCEVE(*SE, *DL, "loopcnt"); 646 LLVMContext &C = SE->getContext(); 647 Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); 648 if (!ExitCount->getType()->isPointerTy() && 649 ExitCount->getType() != CountType) 650 ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); 651 ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); 652 Value *ECValue = 653 SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); 654 655 IRBuilder<> CountBuilder(Preheader->getTerminator()); 656 Module *M = Preheader->getParent()->getParent(); 657 Function *MTCTRFunc = 658 Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType); 659 CountBuilder.CreateCall(MTCTRFunc, ECValue); 660 661 IRBuilder<> CondBuilder(CountedExitBranch); 662 Function *DecFunc = 663 Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); 664 Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); 665 Value *OldCond = CountedExitBranch->getCondition(); 666 CountedExitBranch->setCondition(NewCond); 667 668 // The false branch must exit the loop. 669 if (!L->contains(CountedExitBranch->getSuccessor(0))) 670 CountedExitBranch->swapSuccessors(); 671 672 // The old condition may be dead now, and may have even created a dead PHI 673 // (the original induction variable). 674 RecursivelyDeleteTriviallyDeadInstructions(OldCond); 675 // Run through the basic blocks of the loop and see if any of them have dead 676 // PHIs that can be removed. 677 for (auto I : L->blocks()) 678 DeleteDeadPHIs(I); 679 680 ++NumCTRLoops; 681 return MadeChange; 682 } 683 684 #ifndef NDEBUG 685 static bool clobbersCTR(const MachineInstr &MI) { 686 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 687 const MachineOperand &MO = MI.getOperand(i); 688 if (MO.isReg()) { 689 if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) 690 return true; 691 } else if (MO.isRegMask()) { 692 if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) 693 return true; 694 } 695 } 696 697 return false; 698 } 699 700 static bool verifyCTRBranch(MachineBasicBlock *MBB, 701 MachineBasicBlock::iterator I) { 702 MachineBasicBlock::iterator BI = I; 703 SmallSet<MachineBasicBlock *, 16> Visited; 704 SmallVector<MachineBasicBlock *, 8> Preds; 705 bool CheckPreds; 706 707 if (I == MBB->begin()) { 708 Visited.insert(MBB); 709 goto queue_preds; 710 } else 711 --I; 712 713 check_block: 714 Visited.insert(MBB); 715 if (I == MBB->end()) 716 goto queue_preds; 717 718 CheckPreds = true; 719 for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { 720 unsigned Opc = I->getOpcode(); 721 if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { 722 CheckPreds = false; 723 break; 724 } 725 726 if (I != BI && clobbersCTR(*I)) { 727 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName() 728 << ") instruction " << *I 729 << " clobbers CTR, invalidating " 730 << printMBBReference(*BI->getParent()) << " (" 731 << BI->getParent()->getFullName() << ") instruction " 732 << *BI << "\n"); 733 return false; 734 } 735 736 if (I == IE) 737 break; 738 } 739 740 if (!CheckPreds && Preds.empty()) 741 return true; 742 743 if (CheckPreds) { 744 queue_preds: 745 if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { 746 LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for " 747 << printMBBReference(*BI->getParent()) << " (" 748 << BI->getParent()->getFullName() << ") instruction " 749 << *BI << "\n"); 750 return false; 751 } 752 753 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 754 PIE = MBB->pred_end(); PI != PIE; ++PI) 755 Preds.push_back(*PI); 756 } 757 758 do { 759 MBB = Preds.pop_back_val(); 760 if (!Visited.count(MBB)) { 761 I = MBB->getLastNonDebugInstr(); 762 goto check_block; 763 } 764 } while (!Preds.empty()); 765 766 return true; 767 } 768 769 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { 770 MDT = &getAnalysis<MachineDominatorTree>(); 771 772 // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before 773 // any other instructions that might clobber the ctr register. 774 for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); 775 I != IE; ++I) { 776 MachineBasicBlock *MBB = &*I; 777 if (!MDT->isReachableFromEntry(MBB)) 778 continue; 779 780 for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), 781 MIIE = MBB->end(); MII != MIIE; ++MII) { 782 unsigned Opc = MII->getOpcode(); 783 if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || 784 Opc == PPC::BDZ8 || Opc == PPC::BDZ) 785 if (!verifyCTRBranch(MBB, MII)) 786 llvm_unreachable("Invalid PPC CTR loop!"); 787 } 788 } 789 790 return false; 791 } 792 #endif // NDEBUG 793