1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass identifies loops where we can generate the PPC branch instructions 11 // that decrement and test the count register (CTR) (bdnz and friends). 12 // 13 // The pattern that defines the induction variable can changed depending on 14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt' 15 // normalizes induction variables, and the Loop Strength Reduction pass 16 // run by 'llc' may also make changes to the induction variable. 17 // 18 // Criteria for CTR loops: 19 // - Countable loops (w/ ind. var for a trip count) 20 // - Try inner-most loops first 21 // - No nested CTR loops. 22 // - No function calls in loops. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "PPC.h" 27 #include "PPCSubtarget.h" 28 #include "PPCTargetMachine.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/Statistic.h" 31 #include "llvm/Analysis/LoopInfo.h" 32 #include "llvm/Analysis/ScalarEvolutionExpander.h" 33 #include "llvm/Analysis/TargetLibraryInfo.h" 34 #include "llvm/CodeGen/TargetPassConfig.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/Dominators.h" 38 #include "llvm/IR/InlineAsm.h" 39 #include "llvm/IR/Instructions.h" 40 #include "llvm/IR/IntrinsicInst.h" 41 #include "llvm/IR/Module.h" 42 #include "llvm/IR/ValueHandle.h" 43 #include "llvm/PassSupport.h" 44 #include "llvm/Support/CommandLine.h" 45 #include "llvm/Support/Debug.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include "llvm/Transforms/Scalar.h" 48 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 49 #include "llvm/Transforms/Utils/Local.h" 50 #include "llvm/Transforms/Utils/LoopUtils.h" 51 52 #ifndef NDEBUG 53 #include "llvm/CodeGen/MachineDominators.h" 54 #include "llvm/CodeGen/MachineFunction.h" 55 #include "llvm/CodeGen/MachineFunctionPass.h" 56 #include "llvm/CodeGen/MachineRegisterInfo.h" 57 #endif 58 59 using namespace llvm; 60 61 #define DEBUG_TYPE "ctrloops" 62 63 #ifndef NDEBUG 64 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); 65 #endif 66 67 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); 68 69 namespace llvm { 70 void initializePPCCTRLoopsPass(PassRegistry&); 71 #ifndef NDEBUG 72 void initializePPCCTRLoopsVerifyPass(PassRegistry&); 73 #endif 74 } 75 76 namespace { 77 struct PPCCTRLoops : public FunctionPass { 78 79 #ifndef NDEBUG 80 static int Counter; 81 #endif 82 83 public: 84 static char ID; 85 86 PPCCTRLoops() : FunctionPass(ID) { 87 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 88 } 89 90 bool runOnFunction(Function &F) override; 91 92 void getAnalysisUsage(AnalysisUsage &AU) const override { 93 AU.addRequired<LoopInfoWrapperPass>(); 94 AU.addPreserved<LoopInfoWrapperPass>(); 95 AU.addRequired<DominatorTreeWrapperPass>(); 96 AU.addPreserved<DominatorTreeWrapperPass>(); 97 AU.addRequired<ScalarEvolutionWrapperPass>(); 98 } 99 100 private: 101 bool mightUseCTR(BasicBlock *BB); 102 bool convertToCTRLoop(Loop *L); 103 104 private: 105 const PPCTargetMachine *TM; 106 const PPCSubtarget *STI; 107 const PPCTargetLowering *TLI; 108 const DataLayout *DL; 109 const TargetLibraryInfo *LibInfo; 110 LoopInfo *LI; 111 ScalarEvolution *SE; 112 DominatorTree *DT; 113 bool PreserveLCSSA; 114 }; 115 116 char PPCCTRLoops::ID = 0; 117 #ifndef NDEBUG 118 int PPCCTRLoops::Counter = 0; 119 #endif 120 121 #ifndef NDEBUG 122 struct PPCCTRLoopsVerify : public MachineFunctionPass { 123 public: 124 static char ID; 125 126 PPCCTRLoopsVerify() : MachineFunctionPass(ID) { 127 initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); 128 } 129 130 void getAnalysisUsage(AnalysisUsage &AU) const override { 131 AU.addRequired<MachineDominatorTree>(); 132 MachineFunctionPass::getAnalysisUsage(AU); 133 } 134 135 bool runOnMachineFunction(MachineFunction &MF) override; 136 137 private: 138 MachineDominatorTree *MDT; 139 }; 140 141 char PPCCTRLoopsVerify::ID = 0; 142 #endif // NDEBUG 143 } // end anonymous namespace 144 145 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 146 false, false) 147 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 148 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 149 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 150 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 151 false, false) 152 153 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } 154 155 #ifndef NDEBUG 156 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 157 "PowerPC CTR Loops Verify", false, false) 158 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 159 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 160 "PowerPC CTR Loops Verify", false, false) 161 162 FunctionPass *llvm::createPPCCTRLoopsVerify() { 163 return new PPCCTRLoopsVerify(); 164 } 165 #endif // NDEBUG 166 167 bool PPCCTRLoops::runOnFunction(Function &F) { 168 if (skipFunction(F)) 169 return false; 170 171 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 172 if (!TPC) 173 return false; 174 175 TM = &TPC->getTM<PPCTargetMachine>(); 176 STI = TM->getSubtargetImpl(F); 177 TLI = STI->getTargetLowering(); 178 179 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 180 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 181 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 182 DL = &F.getParent()->getDataLayout(); 183 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 184 LibInfo = TLIP ? &TLIP->getTLI() : nullptr; 185 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 186 187 bool MadeChange = false; 188 189 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); 190 I != E; ++I) { 191 Loop *L = *I; 192 if (!L->getParentLoop()) 193 MadeChange |= convertToCTRLoop(L); 194 } 195 196 return MadeChange; 197 } 198 199 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { 200 if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) 201 return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); 202 203 return false; 204 } 205 206 // Determining the address of a TLS variable results in a function call in 207 // certain TLS models. 208 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { 209 const auto *GV = dyn_cast<GlobalValue>(MemAddr); 210 if (!GV) { 211 // Recurse to check for constants that refer to TLS global variables. 212 if (const auto *CV = dyn_cast<Constant>(MemAddr)) 213 for (const auto &CO : CV->operands()) 214 if (memAddrUsesCTR(TM, CO)) 215 return true; 216 217 return false; 218 } 219 220 if (!GV->isThreadLocal()) 221 return false; 222 TLSModel::Model Model = TM.getTLSModel(GV); 223 return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; 224 } 225 226 // Loop through the inline asm constraints and look for something that clobbers 227 // ctr. 228 static bool asmClobbersCTR(InlineAsm *IA) { 229 InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); 230 for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { 231 InlineAsm::ConstraintInfo &C = CIV[i]; 232 if (C.Type != InlineAsm::isInput) 233 for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) 234 if (StringRef(C.Codes[j]).equals_lower("{ctr}")) 235 return true; 236 } 237 return false; 238 } 239 240 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { 241 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); 242 J != JE; ++J) { 243 if (CallInst *CI = dyn_cast<CallInst>(J)) { 244 // Inline ASM is okay, unless it clobbers the ctr register. 245 if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { 246 if (asmClobbersCTR(IA)) 247 return true; 248 continue; 249 } 250 251 if (Function *F = CI->getCalledFunction()) { 252 // Most intrinsics don't become function calls, but some might. 253 // sin, cos, exp and log are always calls. 254 unsigned Opcode = 0; 255 if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { 256 switch (F->getIntrinsicID()) { 257 default: continue; 258 // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr 259 // we're definitely using CTR. 260 case Intrinsic::ppc_is_decremented_ctr_nonzero: 261 case Intrinsic::ppc_mtctr: 262 return true; 263 264 // VisualStudio defines setjmp as _setjmp 265 #if defined(_MSC_VER) && defined(setjmp) && \ 266 !defined(setjmp_undefined_for_msvc) 267 # pragma push_macro("setjmp") 268 # undef setjmp 269 # define setjmp_undefined_for_msvc 270 #endif 271 272 case Intrinsic::setjmp: 273 274 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) 275 // let's return it to _setjmp state 276 # pragma pop_macro("setjmp") 277 # undef setjmp_undefined_for_msvc 278 #endif 279 280 case Intrinsic::longjmp: 281 282 // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp 283 // because, although it does clobber the counter register, the 284 // control can't then return to inside the loop unless there is also 285 // an eh_sjlj_setjmp. 286 case Intrinsic::eh_sjlj_setjmp: 287 288 case Intrinsic::memcpy: 289 case Intrinsic::memmove: 290 case Intrinsic::memset: 291 case Intrinsic::powi: 292 case Intrinsic::log: 293 case Intrinsic::log2: 294 case Intrinsic::log10: 295 case Intrinsic::exp: 296 case Intrinsic::exp2: 297 case Intrinsic::pow: 298 case Intrinsic::sin: 299 case Intrinsic::cos: 300 return true; 301 case Intrinsic::copysign: 302 if (CI->getArgOperand(0)->getType()->getScalarType()-> 303 isPPC_FP128Ty()) 304 return true; 305 else 306 continue; // ISD::FCOPYSIGN is never a library call. 307 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 308 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 309 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 310 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 311 case Intrinsic::rint: Opcode = ISD::FRINT; break; 312 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 313 case Intrinsic::round: Opcode = ISD::FROUND; break; 314 case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; 315 case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; 316 case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; 317 case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; 318 } 319 } 320 321 // PowerPC does not use [US]DIVREM or other library calls for 322 // operations on regular types which are not otherwise library calls 323 // (i.e. soft float or atomics). If adapting for targets that do, 324 // additional care is required here. 325 326 LibFunc Func; 327 if (!F->hasLocalLinkage() && F->hasName() && LibInfo && 328 LibInfo->getLibFunc(F->getName(), Func) && 329 LibInfo->hasOptimizedCodeGen(Func)) { 330 // Non-read-only functions are never treated as intrinsics. 331 if (!CI->onlyReadsMemory()) 332 return true; 333 334 // Conversion happens only for FP calls. 335 if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) 336 return true; 337 338 switch (Func) { 339 default: return true; 340 case LibFunc_copysign: 341 case LibFunc_copysignf: 342 continue; // ISD::FCOPYSIGN is never a library call. 343 case LibFunc_copysignl: 344 return true; 345 case LibFunc_fabs: 346 case LibFunc_fabsf: 347 case LibFunc_fabsl: 348 continue; // ISD::FABS is never a library call. 349 case LibFunc_sqrt: 350 case LibFunc_sqrtf: 351 case LibFunc_sqrtl: 352 Opcode = ISD::FSQRT; break; 353 case LibFunc_floor: 354 case LibFunc_floorf: 355 case LibFunc_floorl: 356 Opcode = ISD::FFLOOR; break; 357 case LibFunc_nearbyint: 358 case LibFunc_nearbyintf: 359 case LibFunc_nearbyintl: 360 Opcode = ISD::FNEARBYINT; break; 361 case LibFunc_ceil: 362 case LibFunc_ceilf: 363 case LibFunc_ceill: 364 Opcode = ISD::FCEIL; break; 365 case LibFunc_rint: 366 case LibFunc_rintf: 367 case LibFunc_rintl: 368 Opcode = ISD::FRINT; break; 369 case LibFunc_round: 370 case LibFunc_roundf: 371 case LibFunc_roundl: 372 Opcode = ISD::FROUND; break; 373 case LibFunc_trunc: 374 case LibFunc_truncf: 375 case LibFunc_truncl: 376 Opcode = ISD::FTRUNC; break; 377 case LibFunc_fmin: 378 case LibFunc_fminf: 379 case LibFunc_fminl: 380 Opcode = ISD::FMINNUM; break; 381 case LibFunc_fmax: 382 case LibFunc_fmaxf: 383 case LibFunc_fmaxl: 384 Opcode = ISD::FMAXNUM; break; 385 } 386 } 387 388 if (Opcode) { 389 MVT VTy = TLI->getSimpleValueType( 390 *DL, CI->getArgOperand(0)->getType(), true); 391 if (VTy == MVT::Other) 392 return true; 393 394 if (TLI->isOperationLegalOrCustom(Opcode, VTy)) 395 continue; 396 else if (VTy.isVector() && 397 TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType())) 398 continue; 399 400 return true; 401 } 402 } 403 404 return true; 405 } else if (isa<BinaryOperator>(J) && 406 J->getType()->getScalarType()->isPPC_FP128Ty()) { 407 // Most operations on ppc_f128 values become calls. 408 return true; 409 } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || 410 isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { 411 CastInst *CI = cast<CastInst>(J); 412 if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || 413 CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || 414 isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || 415 isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) 416 return true; 417 } else if (isLargeIntegerTy(!TM->isPPC64(), 418 J->getType()->getScalarType()) && 419 (J->getOpcode() == Instruction::UDiv || 420 J->getOpcode() == Instruction::SDiv || 421 J->getOpcode() == Instruction::URem || 422 J->getOpcode() == Instruction::SRem)) { 423 return true; 424 } else if (!TM->isPPC64() && 425 isLargeIntegerTy(false, J->getType()->getScalarType()) && 426 (J->getOpcode() == Instruction::Shl || 427 J->getOpcode() == Instruction::AShr || 428 J->getOpcode() == Instruction::LShr)) { 429 // Only on PPC32, for 128-bit integers (specifically not 64-bit 430 // integers), these might be runtime calls. 431 return true; 432 } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { 433 // On PowerPC, indirect jumps use the counter register. 434 return true; 435 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { 436 if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) 437 return true; 438 } 439 440 if (STI->useSoftFloat()) { 441 switch(J->getOpcode()) { 442 case Instruction::FAdd: 443 case Instruction::FSub: 444 case Instruction::FMul: 445 case Instruction::FDiv: 446 case Instruction::FRem: 447 case Instruction::FPTrunc: 448 case Instruction::FPExt: 449 case Instruction::FPToUI: 450 case Instruction::FPToSI: 451 case Instruction::UIToFP: 452 case Instruction::SIToFP: 453 case Instruction::FCmp: 454 return true; 455 } 456 } 457 458 for (Value *Operand : J->operands()) 459 if (memAddrUsesCTR(*TM, Operand)) 460 return true; 461 } 462 463 return false; 464 } 465 466 bool PPCCTRLoops::convertToCTRLoop(Loop *L) { 467 bool MadeChange = false; 468 469 // Process nested loops first. 470 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 471 MadeChange |= convertToCTRLoop(*I); 472 DEBUG(dbgs() << "Nested loop converted\n"); 473 } 474 475 // If a nested loop has been converted, then we can't convert this loop. 476 if (MadeChange) 477 return MadeChange; 478 479 #ifndef NDEBUG 480 // Stop trying after reaching the limit (if any). 481 int Limit = CTRLoopLimit; 482 if (Limit >= 0) { 483 if (Counter >= CTRLoopLimit) 484 return false; 485 Counter++; 486 } 487 #endif 488 489 // We don't want to spill/restore the counter register, and so we don't 490 // want to use the counter register if the loop contains calls. 491 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 492 I != IE; ++I) 493 if (mightUseCTR(*I)) 494 return MadeChange; 495 496 SmallVector<BasicBlock*, 4> ExitingBlocks; 497 L->getExitingBlocks(ExitingBlocks); 498 499 BasicBlock *CountedExitBlock = nullptr; 500 const SCEV *ExitCount = nullptr; 501 BranchInst *CountedExitBranch = nullptr; 502 for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), 503 IE = ExitingBlocks.end(); I != IE; ++I) { 504 const SCEV *EC = SE->getExitCount(L, *I); 505 DEBUG(dbgs() << "Exit Count for " << *L << " from block " << 506 (*I)->getName() << ": " << *EC << "\n"); 507 if (isa<SCEVCouldNotCompute>(EC)) 508 continue; 509 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { 510 if (ConstEC->getValue()->isZero()) 511 continue; 512 } else if (!SE->isLoopInvariant(EC, L)) 513 continue; 514 515 if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) 516 continue; 517 518 // We now have a loop-invariant count of loop iterations (which is not the 519 // constant zero) for which we know that this loop will not exit via this 520 // exisiting block. 521 522 // We need to make sure that this block will run on every loop iteration. 523 // For this to be true, we must dominate all blocks with backedges. Such 524 // blocks are in-loop predecessors to the header block. 525 bool NotAlways = false; 526 for (pred_iterator PI = pred_begin(L->getHeader()), 527 PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { 528 if (!L->contains(*PI)) 529 continue; 530 531 if (!DT->dominates(*I, *PI)) { 532 NotAlways = true; 533 break; 534 } 535 } 536 537 if (NotAlways) 538 continue; 539 540 // Make sure this blocks ends with a conditional branch. 541 Instruction *TI = (*I)->getTerminator(); 542 if (!TI) 543 continue; 544 545 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 546 if (!BI->isConditional()) 547 continue; 548 549 CountedExitBranch = BI; 550 } else 551 continue; 552 553 // Note that this block may not be the loop latch block, even if the loop 554 // has a latch block. 555 CountedExitBlock = *I; 556 ExitCount = EC; 557 break; 558 } 559 560 if (!CountedExitBlock) 561 return MadeChange; 562 563 BasicBlock *Preheader = L->getLoopPreheader(); 564 565 // If we don't have a preheader, then insert one. If we already have a 566 // preheader, then we can use it (except if the preheader contains a use of 567 // the CTR register because some such uses might be reordered by the 568 // selection DAG after the mtctr instruction). 569 if (!Preheader || mightUseCTR(Preheader)) 570 Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); 571 if (!Preheader) 572 return MadeChange; 573 574 DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); 575 576 // Insert the count into the preheader and replace the condition used by the 577 // selected branch. 578 MadeChange = true; 579 580 SCEVExpander SCEVE(*SE, *DL, "loopcnt"); 581 LLVMContext &C = SE->getContext(); 582 Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); 583 if (!ExitCount->getType()->isPointerTy() && 584 ExitCount->getType() != CountType) 585 ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); 586 ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); 587 Value *ECValue = 588 SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); 589 590 IRBuilder<> CountBuilder(Preheader->getTerminator()); 591 Module *M = Preheader->getParent()->getParent(); 592 Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, 593 CountType); 594 CountBuilder.CreateCall(MTCTRFunc, ECValue); 595 596 IRBuilder<> CondBuilder(CountedExitBranch); 597 Value *DecFunc = 598 Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); 599 Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); 600 Value *OldCond = CountedExitBranch->getCondition(); 601 CountedExitBranch->setCondition(NewCond); 602 603 // The false branch must exit the loop. 604 if (!L->contains(CountedExitBranch->getSuccessor(0))) 605 CountedExitBranch->swapSuccessors(); 606 607 // The old condition may be dead now, and may have even created a dead PHI 608 // (the original induction variable). 609 RecursivelyDeleteTriviallyDeadInstructions(OldCond); 610 // Run through the basic blocks of the loop and see if any of them have dead 611 // PHIs that can be removed. 612 for (auto I : L->blocks()) 613 DeleteDeadPHIs(I); 614 615 ++NumCTRLoops; 616 return MadeChange; 617 } 618 619 #ifndef NDEBUG 620 static bool clobbersCTR(const MachineInstr &MI) { 621 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 622 const MachineOperand &MO = MI.getOperand(i); 623 if (MO.isReg()) { 624 if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) 625 return true; 626 } else if (MO.isRegMask()) { 627 if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) 628 return true; 629 } 630 } 631 632 return false; 633 } 634 635 static bool verifyCTRBranch(MachineBasicBlock *MBB, 636 MachineBasicBlock::iterator I) { 637 MachineBasicBlock::iterator BI = I; 638 SmallSet<MachineBasicBlock *, 16> Visited; 639 SmallVector<MachineBasicBlock *, 8> Preds; 640 bool CheckPreds; 641 642 if (I == MBB->begin()) { 643 Visited.insert(MBB); 644 goto queue_preds; 645 } else 646 --I; 647 648 check_block: 649 Visited.insert(MBB); 650 if (I == MBB->end()) 651 goto queue_preds; 652 653 CheckPreds = true; 654 for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { 655 unsigned Opc = I->getOpcode(); 656 if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { 657 CheckPreds = false; 658 break; 659 } 660 661 if (I != BI && clobbersCTR(*I)) { 662 DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" << 663 MBB->getFullName() << ") instruction " << *I << 664 " clobbers CTR, invalidating " << "BB#" << 665 BI->getParent()->getNumber() << " (" << 666 BI->getParent()->getFullName() << ") instruction " << 667 *BI << "\n"); 668 return false; 669 } 670 671 if (I == IE) 672 break; 673 } 674 675 if (!CheckPreds && Preds.empty()) 676 return true; 677 678 if (CheckPreds) { 679 queue_preds: 680 if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { 681 DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" << 682 BI->getParent()->getNumber() << " (" << 683 BI->getParent()->getFullName() << ") instruction " << 684 *BI << "\n"); 685 return false; 686 } 687 688 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 689 PIE = MBB->pred_end(); PI != PIE; ++PI) 690 Preds.push_back(*PI); 691 } 692 693 do { 694 MBB = Preds.pop_back_val(); 695 if (!Visited.count(MBB)) { 696 I = MBB->getLastNonDebugInstr(); 697 goto check_block; 698 } 699 } while (!Preds.empty()); 700 701 return true; 702 } 703 704 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { 705 MDT = &getAnalysis<MachineDominatorTree>(); 706 707 // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before 708 // any other instructions that might clobber the ctr register. 709 for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); 710 I != IE; ++I) { 711 MachineBasicBlock *MBB = &*I; 712 if (!MDT->isReachableFromEntry(MBB)) 713 continue; 714 715 for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), 716 MIIE = MBB->end(); MII != MIIE; ++MII) { 717 unsigned Opc = MII->getOpcode(); 718 if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || 719 Opc == PPC::BDZ8 || Opc == PPC::BDZ) 720 if (!verifyCTRBranch(MBB, MII)) 721 llvm_unreachable("Invalid PPC CTR loop!"); 722 } 723 } 724 725 return false; 726 } 727 #endif // NDEBUG 728