1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass generates machine instructions for the CTR loops related pseudos: 10 // 1: MTCTRloop/DecreaseCTRloop 11 // 2: MTCTR8loop/DecreaseCTR8loop 12 // 13 // If a CTR loop can be generated: 14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr" 15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and 16 // its user branch instruction can be deleted. 17 // 18 // If a CTR loop can not be generated due to clobber of CTR: 19 // 1: MTCTRloop/MTCTR8loop can be deleted. 20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and 21 // a "cmplwi/cmpldi". 22 // 23 // This pass runs just before register allocation, because we don't want 24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be 25 // generated or if a CTR loop can not be generated, we don't have any condition 26 // register for the new added "cmplwi/cmpldi". 27 // 28 //===----------------------------------------------------------------------===// 29 30 #include "PPC.h" 31 #include "PPCInstrInfo.h" 32 #include "PPCSubtarget.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/CodeGen/MachineBasicBlock.h" 35 #include "llvm/CodeGen/MachineFunction.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineInstr.h" 38 #include "llvm/CodeGen/MachineLoopInfo.h" 39 #include "llvm/CodeGen/MachineOperand.h" 40 #include "llvm/CodeGen/MachineRegisterInfo.h" 41 #include "llvm/CodeGen/Register.h" 42 #include "llvm/InitializePasses.h" 43 #include "llvm/Pass.h" 44 #include "llvm/PassRegistry.h" 45 #include "llvm/Support/ErrorHandling.h" 46 #include <cassert> 47 48 using namespace llvm; 49 50 #define DEBUG_TYPE "ppc-ctrloops" 51 52 STATISTIC(NumCTRLoops, "Number of CTR loops generated"); 53 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated"); 54 55 namespace { 56 class PPCCTRLoops : public MachineFunctionPass { 57 public: 58 static char ID; 59 60 PPCCTRLoops() : MachineFunctionPass(ID) { 61 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 62 } 63 64 void getAnalysisUsage(AnalysisUsage &AU) const override { 65 AU.addRequired<MachineLoopInfoWrapperPass>(); 66 MachineFunctionPass::getAnalysisUsage(AU); 67 } 68 69 bool runOnMachineFunction(MachineFunction &MF) override; 70 71 private: 72 const PPCInstrInfo *TII = nullptr; 73 MachineRegisterInfo *MRI = nullptr; 74 75 bool processLoop(MachineLoop *ML); 76 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const; 77 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 78 MachineInstr *Dec); 79 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec); 80 }; 81 } // namespace 82 83 char PPCCTRLoops::ID = 0; 84 85 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 86 false, false) 87 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 88 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 89 false, false) 90 91 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); } 92 93 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { 94 bool Changed = false; 95 96 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 97 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); 98 MRI = &MF.getRegInfo(); 99 100 for (auto *ML : MLI) { 101 if (ML->isOutermost()) 102 Changed |= processLoop(ML); 103 } 104 105 #ifndef NDEBUG 106 for (const MachineBasicBlock &BB : MF) { 107 for (const MachineInstr &I : BB) 108 assert((I.getOpcode() != PPC::DecreaseCTRloop && 109 I.getOpcode() != PPC::DecreaseCTR8loop) && 110 "CTR loop pseudo is not expanded!"); 111 } 112 #endif 113 114 return Changed; 115 } 116 117 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const { 118 if (!CheckReads) { 119 // If we are only checking for defs, that is we are going to find 120 // definitions before MTCTRloop, for this case: 121 // CTR defination inside the callee of a call instruction will not impact 122 // the defination of MTCTRloop, so we can use definesRegister() for the 123 // check, no need to check the regmask. 124 return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) || 125 MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr); 126 } 127 128 if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) || 129 MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr)) 130 return true; 131 132 if (MI->getDesc().isCall()) 133 return true; 134 135 // We define the CTR in the loop preheader, so if there is any CTR reader in 136 // the loop, we also can not use CTR loop form. 137 if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) || 138 MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr)) 139 return true; 140 141 return false; 142 } 143 144 bool PPCCTRLoops::processLoop(MachineLoop *ML) { 145 bool Changed = false; 146 147 // Align with HardwareLoop pass, process inner loops first. 148 for (MachineLoop *I : *ML) 149 Changed |= processLoop(I); 150 151 // If any inner loop is changed, outter loop must be without hardware loop 152 // intrinsics. 153 if (Changed) 154 return true; 155 156 auto IsLoopStart = [](MachineInstr &MI) { 157 return MI.getOpcode() == PPC::MTCTRloop || 158 MI.getOpcode() == PPC::MTCTR8loop; 159 }; 160 161 auto SearchForStart = 162 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * { 163 for (auto &MI : *MBB) { 164 if (IsLoopStart(MI)) 165 return &MI; 166 } 167 return nullptr; 168 }; 169 170 MachineInstr *Start = nullptr; 171 MachineInstr *Dec = nullptr; 172 bool InvalidCTRLoop = false; 173 174 MachineBasicBlock *Preheader = ML->getLoopPreheader(); 175 // If there is no preheader for this loop, there must be no MTCTRloop 176 // either. 177 if (!Preheader) 178 return false; 179 180 Start = SearchForStart(Preheader); 181 // This is not a CTR loop candidate. 182 if (!Start) 183 return false; 184 185 // If CTR is live to the preheader, we can not redefine the CTR register. 186 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8)) 187 InvalidCTRLoop = true; 188 189 // Make sure there is also no CTR clobber in the block preheader between the 190 // begin and MTCTR. 191 for (MachineBasicBlock::reverse_instr_iterator I = 192 std::next(Start->getReverseIterator()); 193 I != Preheader->instr_rend(); ++I) 194 // Only check the definitions of CTR. If there is non-dead definition for 195 // the CTR, we conservatively don't generate a CTR loop. 196 if (isCTRClobber(&*I, /* CheckReads */ false)) { 197 InvalidCTRLoop = true; 198 break; 199 } 200 201 // Make sure there is also no CTR clobber/user in the block preheader between 202 // MTCTR and the end. 203 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator()); 204 I != Preheader->instr_end(); ++I) 205 if (isCTRClobber(&*I, /* CheckReads */ true)) { 206 InvalidCTRLoop = true; 207 break; 208 } 209 210 // Find the CTR loop components and decide whether or not to fall back to a 211 // normal loop. 212 for (auto *MBB : reverse(ML->getBlocks())) { 213 for (auto &MI : *MBB) { 214 if (MI.getOpcode() == PPC::DecreaseCTRloop || 215 MI.getOpcode() == PPC::DecreaseCTR8loop) 216 Dec = &MI; 217 else if (!InvalidCTRLoop) 218 // If any instruction clobber CTR, then we can not generate a CTR loop. 219 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true); 220 } 221 if (Dec && InvalidCTRLoop) 222 break; 223 } 224 225 assert(Dec && "CTR loop is not complete!"); 226 227 if (InvalidCTRLoop) { 228 expandNormalLoops(ML, Start, Dec); 229 ++NumNormalLoops; 230 } 231 else { 232 expandCTRLoops(ML, Start, Dec); 233 ++NumCTRLoops; 234 } 235 return true; 236 } 237 238 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 239 MachineInstr *Dec) { 240 bool Is64Bit = 241 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 242 243 MachineBasicBlock *Preheader = Start->getParent(); 244 MachineBasicBlock *Exiting = Dec->getParent(); 245 assert((Preheader && Exiting) && 246 "Preheader and exiting should exist for CTR loop!"); 247 248 assert(Dec->getOperand(1).getImm() == 1 && 249 "Loop decrement stride must be 1"); 250 251 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI; 252 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI; 253 254 Register PHIDef = 255 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 256 : &PPC::GPRC_and_GPRC_NOR0RegClass); 257 258 Start->getParent()->getParent()->getProperties().reset( 259 MachineFunctionProperties::Property::NoPHIs); 260 261 // Generate "PHI" in the header block. 262 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(), 263 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef); 264 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader); 265 266 Register ADDIDef = 267 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 268 : &PPC::GPRC_and_GPRC_NOR0RegClass); 269 // Generate "addi -1" in the exiting block. 270 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef) 271 .addReg(PHIDef) 272 .addImm(-1); 273 274 // Add other inputs for the PHI node. 275 if (ML->isLoopLatch(Exiting)) { 276 // There must be only two predecessors for the loop header, one is the 277 // Preheader and the other one is loop latch Exiting. In hardware loop 278 // insertion pass, the block containing DecreaseCTRloop must dominate all 279 // loop latches. So there must be only one latch. 280 assert(ML->getHeader()->pred_size() == 2 && 281 "Loop header predecessor is not right!"); 282 PHIMIB.addReg(ADDIDef).addMBB(Exiting); 283 } else { 284 // If the block containing DecreaseCTRloop is not a loop latch, we can use 285 // ADDIDef as the value for all other blocks for the PHI. In hardware loop 286 // insertion pass, the block containing DecreaseCTRloop must dominate all 287 // loop latches. 288 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) { 289 if (ML->contains(P)) { 290 assert(ML->isLoopLatch(P) && 291 "Loop's header in-loop predecessor is not loop latch!"); 292 PHIMIB.addReg(ADDIDef).addMBB(P); 293 } else 294 assert(P == Preheader && 295 "CTR loop should not be generated for irreducible loop!"); 296 } 297 } 298 299 // Generate the compare in the exiting block. 300 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass); 301 auto CMPMIB = 302 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef) 303 .addReg(ADDIDef) 304 .addImm(0); 305 306 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY), 307 Dec->getOperand(0).getReg()) 308 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt); 309 310 // Remove the pseudo instructions. 311 Start->eraseFromParent(); 312 Dec->eraseFromParent(); 313 } 314 315 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start, 316 MachineInstr *Dec) { 317 bool Is64Bit = 318 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 319 320 MachineBasicBlock *Preheader = Start->getParent(); 321 MachineBasicBlock *Exiting = Dec->getParent(); 322 323 (void)Preheader; 324 assert((Preheader && Exiting) && 325 "Preheader and exiting should exist for CTR loop!"); 326 327 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!"); 328 329 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ; 330 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ; 331 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg()); 332 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) && 333 "There should be only one user for loop decrement pseudo!"); 334 335 unsigned Opcode = 0; 336 switch (BrInstr->getOpcode()) { 337 case PPC::BC: 338 Opcode = BDNZOpcode; 339 (void) ML; 340 assert(ML->contains(BrInstr->getOperand(1).getMBB()) && 341 "Invalid ctr loop!"); 342 break; 343 case PPC::BCn: 344 Opcode = BDZOpcode; 345 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) && 346 "Invalid ctr loop!"); 347 break; 348 default: 349 llvm_unreachable("Unhandled branch user for DecreaseCTRloop."); 350 } 351 352 // Generate "bdnz/bdz" in the exiting block just before the terminator. 353 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode)) 354 .addMBB(BrInstr->getOperand(1).getMBB()); 355 356 // Remove the pseudo instructions. 357 BrInstr->eraseFromParent(); 358 Dec->eraseFromParent(); 359 } 360