1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass generates machine instructions for the CTR loops related pseudos: 10 // 1: MTCTRloop/DecreaseCTRloop 11 // 2: MTCTR8loop/DecreaseCTR8loop 12 // 13 // If a CTR loop can be generated: 14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr" 15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and 16 // its user branch instruction can be deleted. 17 // 18 // If a CTR loop can not be generated due to clobber of CTR: 19 // 1: MTCTRloop/MTCTR8loop can be deleted. 20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and 21 // a "cmplwi/cmpldi". 22 // 23 // This pass runs just before register allocation, because we don't want 24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be 25 // generated or if a CTR loop can not be generated, we don't have any condition 26 // register for the new added "cmplwi/cmpldi". 27 // 28 //===----------------------------------------------------------------------===// 29 30 #include "PPC.h" 31 #include "PPCInstrInfo.h" 32 #include "PPCSubtarget.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/CodeGen/MachineBasicBlock.h" 35 #include "llvm/CodeGen/MachineFunction.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineInstr.h" 38 #include "llvm/CodeGen/MachineLoopInfo.h" 39 #include "llvm/CodeGen/MachineOperand.h" 40 #include "llvm/CodeGen/MachineRegisterInfo.h" 41 #include "llvm/CodeGen/Register.h" 42 #include "llvm/InitializePasses.h" 43 #include "llvm/Pass.h" 44 #include "llvm/PassRegistry.h" 45 #include "llvm/Support/CodeGen.h" 46 #include "llvm/Support/Debug.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include <cassert> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "ppc-ctrloops" 53 54 STATISTIC(NumCTRLoops, "Number of CTR loops generated"); 55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated"); 56 57 namespace { 58 class PPCCTRLoops : public MachineFunctionPass { 59 public: 60 static char ID; 61 62 PPCCTRLoops() : MachineFunctionPass(ID) { 63 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 64 } 65 66 void getAnalysisUsage(AnalysisUsage &AU) const override { 67 AU.addRequired<MachineLoopInfo>(); 68 MachineFunctionPass::getAnalysisUsage(AU); 69 } 70 71 bool runOnMachineFunction(MachineFunction &MF) override; 72 73 private: 74 const PPCInstrInfo *TII = nullptr; 75 MachineRegisterInfo *MRI = nullptr; 76 77 bool processLoop(MachineLoop *ML); 78 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const; 79 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 80 MachineInstr *Dec); 81 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec); 82 }; 83 } // namespace 84 85 char PPCCTRLoops::ID = 0; 86 87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 88 false, false) 89 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 91 false, false) 92 93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); } 94 95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { 96 bool Changed = false; 97 98 auto &MLI = getAnalysis<MachineLoopInfo>(); 99 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); 100 MRI = &MF.getRegInfo(); 101 102 for (auto *ML : MLI) { 103 if (ML->isOutermost()) 104 Changed |= processLoop(ML); 105 } 106 107 return Changed; 108 } 109 110 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const { 111 if (!CheckReads) { 112 // If we are only checking for defs, that is we are going to find 113 // definitions before MTCTRloop, for this case: 114 // CTR defination inside the callee of a call instruction will not impact 115 // the defination of MTCTRloop, so we can use definesRegister() for the 116 // check, no need to check the regmask. 117 return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8); 118 } 119 120 if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8)) 121 return true; 122 123 if (MI->getDesc().isCall()) 124 return true; 125 126 // We define the CTR in the loop preheader, so if there is any CTR reader in 127 // the loop, we also can not use CTR loop form. 128 if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8)) 129 return true; 130 131 return false; 132 } 133 134 bool PPCCTRLoops::processLoop(MachineLoop *ML) { 135 bool Changed = false; 136 137 // Align with HardwareLoop pass, process inner loops first. 138 for (MachineLoop *I : *ML) 139 Changed |= processLoop(I); 140 141 // If any inner loop is changed, outter loop must be without hardware loop 142 // intrinsics. 143 if (Changed) 144 return true; 145 146 auto IsLoopStart = [](MachineInstr &MI) { 147 return MI.getOpcode() == PPC::MTCTRloop || 148 MI.getOpcode() == PPC::MTCTR8loop; 149 }; 150 151 auto SearchForStart = 152 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * { 153 for (auto &MI : *MBB) { 154 if (IsLoopStart(MI)) 155 return &MI; 156 } 157 return nullptr; 158 }; 159 160 MachineInstr *Start = nullptr; 161 MachineInstr *Dec = nullptr; 162 bool InvalidCTRLoop = false; 163 164 MachineBasicBlock *Preheader = ML->getLoopPreheader(); 165 // If there is no preheader for this loop, there must be no MTCTRloop 166 // either. 167 if (!Preheader) 168 return false; 169 170 Start = SearchForStart(Preheader); 171 // This is not a CTR loop candidate. 172 if (!Start) 173 return false; 174 175 // If CTR is live to the preheader, we can not redefine the CTR register. 176 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8)) 177 InvalidCTRLoop = true; 178 179 // Make sure there is also no CTR clobber in the block preheader between the 180 // begin and MTCTR. 181 for (MachineBasicBlock::reverse_instr_iterator I = 182 std::next(Start->getReverseIterator()); 183 I != Preheader->instr_rend(); ++I) 184 // Only check the definitions of CTR. If there is non-dead definition for 185 // the CTR, we conservatively don't generate a CTR loop. 186 if (isCTRClobber(&*I, /* CheckReads */ false)) { 187 InvalidCTRLoop = true; 188 break; 189 } 190 191 // Make sure there is also no CTR clobber/user in the block preheader between 192 // MTCTR and the end. 193 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator()); 194 I != Preheader->instr_end(); ++I) 195 if (isCTRClobber(&*I, /* CheckReads */ true)) { 196 InvalidCTRLoop = true; 197 break; 198 } 199 200 // Find the CTR loop components and decide whether or not to fall back to a 201 // normal loop. 202 for (auto *MBB : reverse(ML->getBlocks())) { 203 for (auto &MI : *MBB) { 204 if (MI.getOpcode() == PPC::DecreaseCTRloop || 205 MI.getOpcode() == PPC::DecreaseCTR8loop) 206 Dec = &MI; 207 else if (!InvalidCTRLoop) 208 // If any instruction clobber CTR, then we can not generate a CTR loop. 209 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true); 210 } 211 if (Dec && InvalidCTRLoop) 212 break; 213 } 214 215 assert(Dec && "CTR loop is not complete!"); 216 217 if (InvalidCTRLoop) { 218 expandNormalLoops(ML, Start, Dec); 219 ++NumNormalLoops; 220 } 221 else { 222 expandCTRLoops(ML, Start, Dec); 223 ++NumCTRLoops; 224 } 225 return true; 226 } 227 228 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 229 MachineInstr *Dec) { 230 bool Is64Bit = 231 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 232 233 MachineBasicBlock *Preheader = Start->getParent(); 234 MachineBasicBlock *Exiting = Dec->getParent(); 235 assert((Preheader && Exiting) && 236 "Preheader and exiting should exist for CTR loop!"); 237 238 assert(Dec->getOperand(1).getImm() == 1 && 239 "Loop decrement stride must be 1"); 240 241 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI; 242 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI; 243 244 Register PHIDef = 245 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 246 : &PPC::GPRC_and_GPRC_NOR0RegClass); 247 248 Start->getParent()->getParent()->getProperties().reset( 249 MachineFunctionProperties::Property::NoPHIs); 250 251 // Generate "PHI" in the header block. 252 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(), 253 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef); 254 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader); 255 256 Register ADDIDef = 257 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 258 : &PPC::GPRC_and_GPRC_NOR0RegClass); 259 // Generate "addi -1" in the exiting block. 260 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef) 261 .addReg(PHIDef) 262 .addImm(-1); 263 264 // Add other inputs for the PHI node. 265 if (ML->isLoopLatch(Exiting)) { 266 // There must be only two predecessors for the loop header, one is the 267 // Preheader and the other one is loop latch Exiting. In hardware loop 268 // insertion pass, the block containing DecreaseCTRloop must dominate all 269 // loop latches. So there must be only one latch. 270 assert(ML->getHeader()->pred_size() == 2 && 271 "Loop header predecessor is not right!"); 272 PHIMIB.addReg(ADDIDef).addMBB(Exiting); 273 } else { 274 // If the block containing DecreaseCTRloop is not a loop latch, we can use 275 // ADDIDef as the value for all other blocks for the PHI. In hardware loop 276 // insertion pass, the block containing DecreaseCTRloop must dominate all 277 // loop latches. 278 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) { 279 if (ML->contains(P)) { 280 assert(ML->isLoopLatch(P) && 281 "Loop's header in-loop predecessor is not loop latch!"); 282 PHIMIB.addReg(ADDIDef).addMBB(P); 283 } else 284 assert(P == Preheader && 285 "CTR loop should not be generated for irreducible loop!"); 286 } 287 } 288 289 // Generate the compare in the exiting block. 290 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass); 291 auto CMPMIB = 292 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef) 293 .addReg(ADDIDef) 294 .addImm(0); 295 296 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY), 297 Dec->getOperand(0).getReg()) 298 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt); 299 300 // Remove the pseudo instructions. 301 Start->eraseFromParent(); 302 Dec->eraseFromParent(); 303 } 304 305 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start, 306 MachineInstr *Dec) { 307 bool Is64Bit = 308 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 309 310 MachineBasicBlock *Preheader = Start->getParent(); 311 MachineBasicBlock *Exiting = Dec->getParent(); 312 313 (void)Preheader; 314 assert((Preheader && Exiting) && 315 "Preheader and exiting should exist for CTR loop!"); 316 317 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!"); 318 319 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ; 320 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ; 321 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg()); 322 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) && 323 "There should be only one user for loop decrement pseudo!"); 324 325 unsigned Opcode = 0; 326 switch (BrInstr->getOpcode()) { 327 case PPC::BC: 328 Opcode = BDNZOpcode; 329 (void) ML; 330 assert(ML->contains(BrInstr->getOperand(1).getMBB()) && 331 "Invalid ctr loop!"); 332 break; 333 case PPC::BCn: 334 Opcode = BDZOpcode; 335 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) && 336 "Invalid ctr loop!"); 337 break; 338 default: 339 llvm_unreachable("Unhandled branch user for DecreaseCTRloop."); 340 } 341 342 // Generate "bdnz/bdz" in the exiting block just before the terminator. 343 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode)) 344 .addMBB(BrInstr->getOperand(1).getMBB()); 345 346 // Remove the pseudo instructions. 347 BrInstr->eraseFromParent(); 348 Dec->eraseFromParent(); 349 } 350