1 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // A pre-emit peephole for catching opportunities introduced by late passes such 10 // as MachineBlockPlacement. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPC.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCSubtarget.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/CodeGen/LivePhysRegs.h" 20 #include "llvm/CodeGen/MachineBasicBlock.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Debug.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "ppc-pre-emit-peephole" 31 32 STATISTIC(NumRRConvertedInPreEmit, 33 "Number of r+r instructions converted to r+i in pre-emit peephole"); 34 STATISTIC(NumRemovedInPreEmit, 35 "Number of instructions deleted in pre-emit peephole"); 36 STATISTIC(NumberOfSelfCopies, 37 "Number of self copy instructions eliminated"); 38 STATISTIC(NumFrameOffFoldInPreEmit, 39 "Number of folding frame offset by using r+r in pre-emit peephole"); 40 41 static cl::opt<bool> 42 EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), 43 cl::desc("enable PC Relative linker optimization")); 44 45 static cl::opt<bool> 46 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), 47 cl::desc("Run pre-emit peephole optimizations.")); 48 49 namespace { 50 51 static bool hasPCRelativeForm(MachineInstr &Use) { 52 switch (Use.getOpcode()) { 53 default: 54 return false; 55 case PPC::LBZ: 56 case PPC::LBZ8: 57 case PPC::LHA: 58 case PPC::LHA8: 59 case PPC::LHZ: 60 case PPC::LHZ8: 61 case PPC::LWZ: 62 case PPC::LWZ8: 63 case PPC::STB: 64 case PPC::STB8: 65 case PPC::STH: 66 case PPC::STH8: 67 case PPC::STW: 68 case PPC::STW8: 69 case PPC::LD: 70 case PPC::STD: 71 case PPC::LWA: 72 case PPC::LXSD: 73 case PPC::LXSSP: 74 case PPC::LXV: 75 case PPC::STXSD: 76 case PPC::STXSSP: 77 case PPC::STXV: 78 case PPC::LFD: 79 case PPC::LFS: 80 case PPC::STFD: 81 case PPC::STFS: 82 case PPC::DFLOADf32: 83 case PPC::DFLOADf64: 84 case PPC::DFSTOREf32: 85 case PPC::DFSTOREf64: 86 return true; 87 } 88 } 89 90 class PPCPreEmitPeephole : public MachineFunctionPass { 91 public: 92 static char ID; 93 PPCPreEmitPeephole() : MachineFunctionPass(ID) { 94 initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry()); 95 } 96 97 void getAnalysisUsage(AnalysisUsage &AU) const override { 98 MachineFunctionPass::getAnalysisUsage(AU); 99 } 100 101 MachineFunctionProperties getRequiredProperties() const override { 102 return MachineFunctionProperties().set( 103 MachineFunctionProperties::Property::NoVRegs); 104 } 105 106 // This function removes any redundant load immediates. It has two level 107 // loops - The outer loop finds the load immediates BBI that could be used 108 // to replace following redundancy. The inner loop scans instructions that 109 // after BBI to find redundancy and update kill/dead flags accordingly. If 110 // AfterBBI is the same as BBI, it is redundant, otherwise any instructions 111 // that modify the def register of BBI would break the scanning. 112 // DeadOrKillToUnset is a pointer to the previous operand that had the 113 // kill/dead flag set. It keeps track of the def register of BBI, the use 114 // registers of AfterBBIs and the def registers of AfterBBIs. 115 bool removeRedundantLIs(MachineBasicBlock &MBB, 116 const TargetRegisterInfo *TRI) { 117 LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n"; 118 MBB.dump(); dbgs() << "\n"); 119 120 DenseSet<MachineInstr *> InstrsToErase; 121 for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { 122 // Skip load immediate that is marked to be erased later because it 123 // cannot be used to replace any other instructions. 124 if (InstrsToErase.find(&*BBI) != InstrsToErase.end()) 125 continue; 126 // Skip non-load immediate. 127 unsigned Opc = BBI->getOpcode(); 128 if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && 129 Opc != PPC::LIS8) 130 continue; 131 // Skip load immediate, where the operand is a relocation (e.g., $r3 = 132 // LI target-flags(ppc-lo) %const.0). 133 if (!BBI->getOperand(1).isImm()) 134 continue; 135 assert(BBI->getOperand(0).isReg() && 136 "Expected a register for the first operand"); 137 138 LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump();); 139 140 Register Reg = BBI->getOperand(0).getReg(); 141 int64_t Imm = BBI->getOperand(1).getImm(); 142 MachineOperand *DeadOrKillToUnset = nullptr; 143 if (BBI->getOperand(0).isDead()) { 144 DeadOrKillToUnset = &BBI->getOperand(0); 145 LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset 146 << " from load immediate " << *BBI 147 << " is a unsetting candidate\n"); 148 } 149 // This loop scans instructions after BBI to see if there is any 150 // redundant load immediate. 151 for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end(); 152 ++AfterBBI) { 153 // Track the operand that kill Reg. We would unset the kill flag of 154 // the operand if there is a following redundant load immediate. 155 int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI); 156 157 // We can't just clear implicit kills, so if we encounter one, stop 158 // looking further. 159 if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) { 160 LLVM_DEBUG(dbgs() 161 << "Encountered an implicit kill, cannot proceed: "); 162 LLVM_DEBUG(AfterBBI->dump()); 163 break; 164 } 165 166 if (KillIdx != -1) { 167 assert(!DeadOrKillToUnset && "Shouldn't kill same register twice"); 168 DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx); 169 LLVM_DEBUG(dbgs() 170 << " Kill flag of " << *DeadOrKillToUnset << " from " 171 << *AfterBBI << " is a unsetting candidate\n"); 172 } 173 174 if (!AfterBBI->modifiesRegister(Reg, TRI)) 175 continue; 176 // Finish scanning because Reg is overwritten by a non-load 177 // instruction. 178 if (AfterBBI->getOpcode() != Opc) 179 break; 180 assert(AfterBBI->getOperand(0).isReg() && 181 "Expected a register for the first operand"); 182 // Finish scanning because Reg is overwritten by a relocation or a 183 // different value. 184 if (!AfterBBI->getOperand(1).isImm() || 185 AfterBBI->getOperand(1).getImm() != Imm) 186 break; 187 188 // It loads same immediate value to the same Reg, which is redundant. 189 // We would unset kill flag in previous Reg usage to extend live range 190 // of Reg first, then remove the redundancy. 191 if (DeadOrKillToUnset) { 192 LLVM_DEBUG(dbgs() 193 << " Unset dead/kill flag of " << *DeadOrKillToUnset 194 << " from " << *DeadOrKillToUnset->getParent()); 195 if (DeadOrKillToUnset->isDef()) 196 DeadOrKillToUnset->setIsDead(false); 197 else 198 DeadOrKillToUnset->setIsKill(false); 199 } 200 DeadOrKillToUnset = 201 AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); 202 if (DeadOrKillToUnset) 203 LLVM_DEBUG(dbgs() 204 << " Dead flag of " << *DeadOrKillToUnset << " from " 205 << *AfterBBI << " is a unsetting candidate\n"); 206 InstrsToErase.insert(&*AfterBBI); 207 LLVM_DEBUG(dbgs() << " Remove redundant load immediate: "; 208 AfterBBI->dump()); 209 } 210 } 211 212 for (MachineInstr *MI : InstrsToErase) { 213 MI->eraseFromParent(); 214 } 215 NumRemovedInPreEmit += InstrsToErase.size(); 216 return !InstrsToErase.empty(); 217 } 218 219 // Check if this instruction is a PLDpc that is part of a GOT indirect 220 // access. 221 bool isGOTPLDpc(MachineInstr &Instr) { 222 if (Instr.getOpcode() != PPC::PLDpc) 223 return false; 224 225 // The result must be a register. 226 const MachineOperand &LoadedAddressReg = Instr.getOperand(0); 227 if (!LoadedAddressReg.isReg()) 228 return false; 229 230 // Make sure that this is a global symbol. 231 const MachineOperand &SymbolOp = Instr.getOperand(1); 232 if (!SymbolOp.isGlobal()) 233 return false; 234 235 // Finally return true only if the GOT flag is present. 236 return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG); 237 } 238 239 bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { 240 MachineFunction *MF = MBB.getParent(); 241 // If the linker opt is disabled then just return. 242 if (!EnablePCRelLinkerOpt) 243 return false; 244 245 // Add this linker opt only if we are using PC Relative memops. 246 if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) 247 return false; 248 249 // Struct to keep track of one def/use pair for a GOT indirect access. 250 struct GOTDefUsePair { 251 MachineBasicBlock::iterator DefInst; 252 MachineBasicBlock::iterator UseInst; 253 Register DefReg; 254 Register UseReg; 255 bool StillValid; 256 }; 257 // Vector of def/ues pairs in this basic block. 258 SmallVector<GOTDefUsePair, 4> CandPairs; 259 SmallVector<GOTDefUsePair, 4> ValidPairs; 260 bool MadeChange = false; 261 262 // Run through all of the instructions in the basic block and try to 263 // collect potential pairs of GOT indirect access instructions. 264 for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { 265 // Look for the initial GOT indirect load. 266 if (isGOTPLDpc(*BBI)) { 267 GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(), 268 BBI->getOperand(0).getReg(), 269 PPC::NoRegister, true}; 270 CandPairs.push_back(CurrentPair); 271 continue; 272 } 273 274 // We haven't encountered any new PLD instructions, nothing to check. 275 if (CandPairs.empty()) 276 continue; 277 278 // Run through the candidate pairs and see if any of the registers 279 // defined in the PLD instructions are used by this instruction. 280 // Note: the size of CandPairs can change in the loop. 281 for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) { 282 GOTDefUsePair &Pair = CandPairs[Idx]; 283 // The instruction does not use or modify this PLD's def reg, 284 // ignore it. 285 if (!BBI->readsRegister(Pair.DefReg, TRI) && 286 !BBI->modifiesRegister(Pair.DefReg, TRI)) 287 continue; 288 289 // The use needs to be used in the address compuation and not 290 // as the register being stored for a store. 291 const MachineOperand *UseOp = 292 hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr; 293 294 // Check for a valid use. 295 if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg && 296 UseOp->isUse() && UseOp->isKill()) { 297 Pair.UseInst = BBI; 298 Pair.UseReg = BBI->getOperand(0).getReg(); 299 ValidPairs.push_back(Pair); 300 } 301 CandPairs.erase(CandPairs.begin() + Idx); 302 } 303 } 304 305 // Go through all of the pairs and check for any more valid uses. 306 for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) { 307 // We shouldn't be here if we don't have a valid pair. 308 assert(Pair->UseInst.isValid() && Pair->StillValid && 309 "Kept an invalid def/use pair for GOT PCRel opt"); 310 // We have found a potential pair. Search through the instructions 311 // between the def and the use to see if it is valid to mark this as a 312 // linker opt. 313 MachineBasicBlock::iterator BBI = Pair->DefInst; 314 ++BBI; 315 for (; BBI != Pair->UseInst; ++BBI) { 316 if (BBI->readsRegister(Pair->UseReg, TRI) || 317 BBI->modifiesRegister(Pair->UseReg, TRI)) { 318 Pair->StillValid = false; 319 break; 320 } 321 } 322 323 if (!Pair->StillValid) 324 continue; 325 326 // The load/store instruction that uses the address from the PLD will 327 // either use a register (for a store) or define a register (for the 328 // load). That register will be added as an implicit def to the PLD 329 // and as an implicit use on the second memory op. This is a precaution 330 // to prevent future passes from using that register between the two 331 // instructions. 332 MachineOperand ImplDef = 333 MachineOperand::CreateReg(Pair->UseReg, true, true); 334 MachineOperand ImplUse = 335 MachineOperand::CreateReg(Pair->UseReg, false, true); 336 Pair->DefInst->addOperand(ImplDef); 337 Pair->UseInst->addOperand(ImplUse); 338 339 // Create the symbol. 340 MCContext &Context = MF->getContext(); 341 MCSymbol *Symbol = 342 Context.createTempSymbol(Twine("pcrel"), false, false); 343 MachineOperand PCRelLabel = 344 MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG); 345 Pair->DefInst->addOperand(*MF, PCRelLabel); 346 Pair->UseInst->addOperand(*MF, PCRelLabel); 347 MadeChange |= true; 348 } 349 return MadeChange; 350 } 351 352 bool runOnMachineFunction(MachineFunction &MF) override { 353 if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { 354 // Remove UNENCODED_NOP even when this pass is disabled. 355 // This needs to be done unconditionally so we don't emit zeros 356 // in the instruction stream. 357 SmallVector<MachineInstr *, 4> InstrsToErase; 358 for (MachineBasicBlock &MBB : MF) 359 for (MachineInstr &MI : MBB) 360 if (MI.getOpcode() == PPC::UNENCODED_NOP) 361 InstrsToErase.push_back(&MI); 362 for (MachineInstr *MI : InstrsToErase) 363 MI->eraseFromParent(); 364 return false; 365 } 366 bool Changed = false; 367 const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); 368 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 369 SmallVector<MachineInstr *, 4> InstrsToErase; 370 for (MachineBasicBlock &MBB : MF) { 371 Changed |= removeRedundantLIs(MBB, TRI); 372 Changed |= addLinkerOpt(MBB, TRI); 373 for (MachineInstr &MI : MBB) { 374 unsigned Opc = MI.getOpcode(); 375 if (Opc == PPC::UNENCODED_NOP) { 376 InstrsToErase.push_back(&MI); 377 continue; 378 } 379 // Detect self copies - these can result from running AADB. 380 if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) { 381 const MCInstrDesc &MCID = TII->get(Opc); 382 if (MCID.getNumOperands() == 3 && 383 MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && 384 MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { 385 NumberOfSelfCopies++; 386 LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); 387 LLVM_DEBUG(MI.dump()); 388 InstrsToErase.push_back(&MI); 389 continue; 390 } 391 else if (MCID.getNumOperands() == 2 && 392 MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { 393 NumberOfSelfCopies++; 394 LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); 395 LLVM_DEBUG(MI.dump()); 396 InstrsToErase.push_back(&MI); 397 continue; 398 } 399 } 400 MachineInstr *DefMIToErase = nullptr; 401 if (TII->convertToImmediateForm(MI, &DefMIToErase)) { 402 Changed = true; 403 NumRRConvertedInPreEmit++; 404 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: "); 405 LLVM_DEBUG(MI.dump()); 406 if (DefMIToErase) { 407 InstrsToErase.push_back(DefMIToErase); 408 } 409 } 410 if (TII->foldFrameOffset(MI)) { 411 Changed = true; 412 NumFrameOffFoldInPreEmit++; 413 LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); 414 LLVM_DEBUG(MI.dump()); 415 } 416 } 417 418 // Eliminate conditional branch based on a constant CR bit by 419 // CRSET or CRUNSET. We eliminate the conditional branch or 420 // convert it into an unconditional branch. Also, if the CR bit 421 // is not used by other instructions, we eliminate CRSET as well. 422 auto I = MBB.getFirstInstrTerminator(); 423 if (I == MBB.instr_end()) 424 continue; 425 MachineInstr *Br = &*I; 426 if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) 427 continue; 428 MachineInstr *CRSetMI = nullptr; 429 Register CRBit = Br->getOperand(0).getReg(); 430 unsigned CRReg = getCRFromCRBit(CRBit); 431 bool SeenUse = false; 432 MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); 433 for (It++; It != Er; It++) { 434 if (It->modifiesRegister(CRBit, TRI)) { 435 if ((It->getOpcode() == PPC::CRUNSET || 436 It->getOpcode() == PPC::CRSET) && 437 It->getOperand(0).getReg() == CRBit) 438 CRSetMI = &*It; 439 break; 440 } 441 if (It->readsRegister(CRBit, TRI)) 442 SeenUse = true; 443 } 444 if (!CRSetMI) continue; 445 446 unsigned CRSetOp = CRSetMI->getOpcode(); 447 if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) || 448 (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) { 449 // Remove this branch since it cannot be taken. 450 InstrsToErase.push_back(Br); 451 MBB.removeSuccessor(Br->getOperand(1).getMBB()); 452 } 453 else { 454 // This conditional branch is always taken. So, remove all branches 455 // and insert an unconditional branch to the destination of this. 456 MachineBasicBlock::iterator It = Br, Er = MBB.end(); 457 for (; It != Er; It++) { 458 if (It->isDebugInstr()) continue; 459 assert(It->isTerminator() && "Non-terminator after a terminator"); 460 InstrsToErase.push_back(&*It); 461 } 462 if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) { 463 ArrayRef<MachineOperand> NoCond; 464 TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr, 465 NoCond, Br->getDebugLoc()); 466 } 467 for (auto &Succ : MBB.successors()) 468 if (Succ != Br->getOperand(1).getMBB()) { 469 MBB.removeSuccessor(Succ); 470 break; 471 } 472 } 473 474 // If the CRBit is not used by another instruction, we can eliminate 475 // CRSET/CRUNSET instruction. 476 if (!SeenUse) { 477 // We need to check use of the CRBit in successors. 478 for (auto &SuccMBB : MBB.successors()) 479 if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) { 480 SeenUse = true; 481 break; 482 } 483 if (!SeenUse) 484 InstrsToErase.push_back(CRSetMI); 485 } 486 } 487 for (MachineInstr *MI : InstrsToErase) { 488 LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: "); 489 LLVM_DEBUG(MI->dump()); 490 MI->eraseFromParent(); 491 NumRemovedInPreEmit++; 492 } 493 return Changed; 494 } 495 }; 496 } 497 498 INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole", 499 false, false) 500 char PPCPreEmitPeephole::ID = 0; 501 502 FunctionPass *llvm::createPPCPreEmitPeepholePass() { 503 return new PPCPreEmitPeephole(); 504 } 505