1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===---------------------------------------------------------------------===// 8 // 9 // This pass performs peephole optimizations to clean up ugly code 10 // sequences at the MachineInstruction layer. It runs at the end of 11 // the SSA phases, following VSX swap removal. A pass of dead code 12 // elimination follows this one for quick clean-up of any dead 13 // instructions introduced here. Although we could do this as callbacks 14 // from the generic peephole pass, this would have a couple of bad 15 // effects: it might remove optimization opportunities for VSX swap 16 // removal, and it would miss cleanups made possible following VSX 17 // swap removal. 18 // 19 //===---------------------------------------------------------------------===// 20 21 #include "MCTargetDesc/PPCMCTargetDesc.h" 22 #include "MCTargetDesc/PPCPredicates.h" 23 #include "PPC.h" 24 #include "PPCInstrBuilder.h" 25 #include "PPCInstrInfo.h" 26 #include "PPCMachineFunctionInfo.h" 27 #include "PPCTargetMachine.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 30 #include "llvm/CodeGen/MachineDominators.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachinePostDominators.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/InitializePasses.h" 36 #include "llvm/Support/Debug.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "ppc-mi-peepholes" 41 42 STATISTIC(RemoveTOCSave, "Number of TOC saves removed"); 43 STATISTIC(MultiTOCSaves, 44 "Number of functions with multiple TOC saves that must be kept"); 45 STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue"); 46 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions"); 47 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions"); 48 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); 49 STATISTIC(NumConvertedToImmediateForm, 50 "Number of instructions converted to their immediate form"); 51 STATISTIC(NumFunctionsEnteredInMIPeephole, 52 "Number of functions entered in PPC MI Peepholes"); 53 STATISTIC(NumFixedPointIterations, 54 "Number of fixed-point iterations converting reg-reg instructions " 55 "to reg-imm ones"); 56 STATISTIC(NumRotatesCollapsed, 57 "Number of pairs of rotate left, clear left/right collapsed"); 58 STATISTIC(NumEXTSWAndSLDICombined, 59 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); 60 61 static cl::opt<bool> 62 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), 63 cl::desc("Iterate to a fixed point when attempting to " 64 "convert reg-reg instructions to reg-imm")); 65 66 static cl::opt<bool> 67 ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), 68 cl::desc("Convert eligible reg+reg instructions to reg+imm")); 69 70 static cl::opt<bool> 71 EnableSExtElimination("ppc-eliminate-signext", 72 cl::desc("enable elimination of sign-extensions"), 73 cl::init(false), cl::Hidden); 74 75 static cl::opt<bool> 76 EnableZExtElimination("ppc-eliminate-zeroext", 77 cl::desc("enable elimination of zero-extensions"), 78 cl::init(false), cl::Hidden); 79 80 namespace { 81 82 struct PPCMIPeephole : public MachineFunctionPass { 83 84 static char ID; 85 const PPCInstrInfo *TII; 86 MachineFunction *MF; 87 MachineRegisterInfo *MRI; 88 89 PPCMIPeephole() : MachineFunctionPass(ID) { 90 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry()); 91 } 92 93 private: 94 MachineDominatorTree *MDT; 95 MachinePostDominatorTree *MPDT; 96 MachineBlockFrequencyInfo *MBFI; 97 uint64_t EntryFreq; 98 99 // Initialize class variables. 100 void initialize(MachineFunction &MFParm); 101 102 // Perform peepholes. 103 bool simplifyCode(void); 104 105 // Perform peepholes. 106 bool eliminateRedundantCompare(void); 107 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves); 108 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase); 109 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI); 110 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves, 111 MachineInstr *MI); 112 113 public: 114 115 void getAnalysisUsage(AnalysisUsage &AU) const override { 116 AU.addRequired<MachineDominatorTree>(); 117 AU.addRequired<MachinePostDominatorTree>(); 118 AU.addRequired<MachineBlockFrequencyInfo>(); 119 AU.addPreserved<MachineDominatorTree>(); 120 AU.addPreserved<MachinePostDominatorTree>(); 121 AU.addPreserved<MachineBlockFrequencyInfo>(); 122 MachineFunctionPass::getAnalysisUsage(AU); 123 } 124 125 // Main entry point for this pass. 126 bool runOnMachineFunction(MachineFunction &MF) override { 127 initialize(MF); 128 // At this point, TOC pointer should not be used in a function that uses 129 // PC-Relative addressing. 130 assert((MF.getRegInfo().use_empty(PPC::X2) || 131 !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) && 132 "TOC pointer used in a function using PC-Relative addressing!"); 133 if (skipFunction(MF.getFunction())) 134 return false; 135 return simplifyCode(); 136 } 137 }; 138 139 // Initialize class variables. 140 void PPCMIPeephole::initialize(MachineFunction &MFParm) { 141 MF = &MFParm; 142 MRI = &MF->getRegInfo(); 143 MDT = &getAnalysis<MachineDominatorTree>(); 144 MPDT = &getAnalysis<MachinePostDominatorTree>(); 145 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 146 EntryFreq = MBFI->getEntryFreq(); 147 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 148 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n"); 149 LLVM_DEBUG(MF->dump()); 150 } 151 152 static MachineInstr *getVRegDefOrNull(MachineOperand *Op, 153 MachineRegisterInfo *MRI) { 154 assert(Op && "Invalid Operand!"); 155 if (!Op->isReg()) 156 return nullptr; 157 158 Register Reg = Op->getReg(); 159 if (!Register::isVirtualRegister(Reg)) 160 return nullptr; 161 162 return MRI->getVRegDef(Reg); 163 } 164 165 // This function returns number of known zero bits in output of MI 166 // starting from the most significant bit. 167 static unsigned 168 getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) { 169 unsigned Opcode = MI->getOpcode(); 170 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec || 171 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec) 172 return MI->getOperand(3).getImm(); 173 174 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) && 175 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm()) 176 return MI->getOperand(3).getImm(); 177 178 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec || 179 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec || 180 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && 181 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm()) 182 return 32 + MI->getOperand(3).getImm(); 183 184 if (Opcode == PPC::ANDI_rec) { 185 uint16_t Imm = MI->getOperand(2).getImm(); 186 return 48 + countLeadingZeros(Imm); 187 } 188 189 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec || 190 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec || 191 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8) 192 // The result ranges from 0 to 32. 193 return 58; 194 195 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec || 196 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec) 197 // The result ranges from 0 to 64. 198 return 57; 199 200 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX || 201 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || 202 Opcode == PPC::LHZU || Opcode == PPC::LHZUX || 203 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8) 204 return 48; 205 206 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX || 207 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || 208 Opcode == PPC::LBZU || Opcode == PPC::LBZUX || 209 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8) 210 return 56; 211 212 if (TII->isZeroExtended(*MI)) 213 return 32; 214 215 return 0; 216 } 217 218 // This function maintains a map for the pairs <TOC Save Instr, Keep> 219 // Each time a new TOC save is encountered, it checks if any of the existing 220 // ones are dominated by the new one. If so, it marks the existing one as 221 // redundant by setting it's entry in the map as false. It then adds the new 222 // instruction to the map with either true or false depending on if any 223 // existing instructions dominated the new one. 224 void PPCMIPeephole::UpdateTOCSaves( 225 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) { 226 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here"); 227 assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() && 228 "TOC-save removal only supported on ELFv2"); 229 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 230 231 MachineBasicBlock *Entry = &MF->front(); 232 uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency(); 233 234 // If the block in which the TOC save resides is in a block that 235 // post-dominates Entry, or a block that is hotter than entry (keep in mind 236 // that early MachineLICM has already run so the TOC save won't be hoisted) 237 // we can just do the save in the prologue. 238 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry)) 239 FI->setMustSaveTOC(true); 240 241 // If we are saving the TOC in the prologue, all the TOC saves can be removed 242 // from the code. 243 if (FI->mustSaveTOC()) { 244 for (auto &TOCSave : TOCSaves) 245 TOCSave.second = false; 246 // Add new instruction to map. 247 TOCSaves[MI] = false; 248 return; 249 } 250 251 bool Keep = true; 252 for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) { 253 MachineInstr *CurrInst = It->first; 254 // If new instruction dominates an existing one, mark existing one as 255 // redundant. 256 if (It->second && MDT->dominates(MI, CurrInst)) 257 It->second = false; 258 // Check if the new instruction is redundant. 259 if (MDT->dominates(CurrInst, MI)) { 260 Keep = false; 261 break; 262 } 263 } 264 // Add new instruction to map. 265 TOCSaves[MI] = Keep; 266 } 267 268 // Perform peephole optimizations. 269 bool PPCMIPeephole::simplifyCode(void) { 270 bool Simplified = false; 271 MachineInstr* ToErase = nullptr; 272 std::map<MachineInstr *, bool> TOCSaves; 273 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 274 NumFunctionsEnteredInMIPeephole++; 275 if (ConvertRegReg) { 276 // Fixed-point conversion of reg/reg instructions fed by load-immediate 277 // into reg/imm instructions. FIXME: This is expensive, control it with 278 // an option. 279 bool SomethingChanged = false; 280 do { 281 NumFixedPointIterations++; 282 SomethingChanged = false; 283 for (MachineBasicBlock &MBB : *MF) { 284 for (MachineInstr &MI : MBB) { 285 if (MI.isDebugInstr()) 286 continue; 287 288 if (TII->convertToImmediateForm(MI)) { 289 // We don't erase anything in case the def has other uses. Let DCE 290 // remove it if it can be removed. 291 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: "); 292 LLVM_DEBUG(MI.dump()); 293 NumConvertedToImmediateForm++; 294 SomethingChanged = true; 295 Simplified = true; 296 continue; 297 } 298 } 299 } 300 } while (SomethingChanged && FixedPointRegToImm); 301 } 302 303 for (MachineBasicBlock &MBB : *MF) { 304 for (MachineInstr &MI : MBB) { 305 306 // If the previous instruction was marked for elimination, 307 // remove it now. 308 if (ToErase) { 309 ToErase->eraseFromParent(); 310 ToErase = nullptr; 311 } 312 313 // Ignore debug instructions. 314 if (MI.isDebugInstr()) 315 continue; 316 317 // Per-opcode peepholes. 318 switch (MI.getOpcode()) { 319 320 default: 321 break; 322 323 case PPC::STD: { 324 MachineFrameInfo &MFI = MF->getFrameInfo(); 325 if (MFI.hasVarSizedObjects() || 326 !MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) 327 break; 328 // When encountering a TOC save instruction, call UpdateTOCSaves 329 // to add it to the TOCSaves map and mark any existing TOC saves 330 // it dominates as redundant. 331 if (TII->isTOCSaveMI(MI)) 332 UpdateTOCSaves(TOCSaves, &MI); 333 break; 334 } 335 case PPC::XXPERMDI: { 336 // Perform simplifications of 2x64 vector swaps and splats. 337 // A swap is identified by an immediate value of 2, and a splat 338 // is identified by an immediate value of 0 or 3. 339 int Immed = MI.getOperand(3).getImm(); 340 341 if (Immed == 1) 342 break; 343 344 // For each of these simplifications, we need the two source 345 // regs to match. Unfortunately, MachineCSE ignores COPY and 346 // SUBREG_TO_REG, so for example we can see 347 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed. 348 // We have to look through chains of COPY and SUBREG_TO_REG 349 // to find the real source values for comparison. 350 unsigned TrueReg1 = 351 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); 352 unsigned TrueReg2 = 353 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI); 354 355 if (!(TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1))) 356 break; 357 358 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); 359 360 if (!DefMI) 361 break; 362 363 unsigned DefOpc = DefMI->getOpcode(); 364 365 // If this is a splat fed by a splatting load, the splat is 366 // redundant. Replace with a copy. This doesn't happen directly due 367 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting 368 // a load of a double to a vector of 64-bit integers. 369 auto isConversionOfLoadAndSplat = [=]() -> bool { 370 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS) 371 return false; 372 unsigned FeedReg1 = 373 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); 374 if (Register::isVirtualRegister(FeedReg1)) { 375 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1); 376 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) 377 return true; 378 } 379 return false; 380 }; 381 if ((Immed == 0 || Immed == 3) && 382 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) { 383 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat " 384 "to load-and-splat/copy: "); 385 LLVM_DEBUG(MI.dump()); 386 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 387 MI.getOperand(0).getReg()) 388 .add(MI.getOperand(1)); 389 ToErase = &MI; 390 Simplified = true; 391 } 392 393 // If this is a splat or a swap fed by another splat, we 394 // can replace it with a copy. 395 if (DefOpc == PPC::XXPERMDI) { 396 unsigned DefReg1 = DefMI->getOperand(1).getReg(); 397 unsigned DefReg2 = DefMI->getOperand(2).getReg(); 398 unsigned DefImmed = DefMI->getOperand(3).getImm(); 399 400 // If the two inputs are not the same register, check to see if 401 // they originate from the same virtual register after only 402 // copy-like instructions. 403 if (DefReg1 != DefReg2) { 404 unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI); 405 unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI); 406 407 if (!(FeedReg1 == FeedReg2 && 408 Register::isVirtualRegister(FeedReg1))) 409 break; 410 } 411 412 if (DefImmed == 0 || DefImmed == 3) { 413 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat " 414 "to splat/copy: "); 415 LLVM_DEBUG(MI.dump()); 416 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 417 MI.getOperand(0).getReg()) 418 .add(MI.getOperand(1)); 419 ToErase = &MI; 420 Simplified = true; 421 } 422 423 // If this is a splat fed by a swap, we can simplify modify 424 // the splat to splat the other value from the swap's input 425 // parameter. 426 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) { 427 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: "); 428 LLVM_DEBUG(MI.dump()); 429 MI.getOperand(1).setReg(DefReg1); 430 MI.getOperand(2).setReg(DefReg2); 431 MI.getOperand(3).setImm(3 - Immed); 432 Simplified = true; 433 } 434 435 // If this is a swap fed by a swap, we can replace it 436 // with a copy from the first swap's input. 437 else if (Immed == 2 && DefImmed == 2) { 438 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: "); 439 LLVM_DEBUG(MI.dump()); 440 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 441 MI.getOperand(0).getReg()) 442 .add(DefMI->getOperand(1)); 443 ToErase = &MI; 444 Simplified = true; 445 } 446 } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs && 447 (DefMI->getOperand(2).getImm() == 0 || 448 DefMI->getOperand(2).getImm() == 3)) { 449 // Splat fed by another splat - switch the output of the first 450 // and remove the second. 451 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); 452 ToErase = &MI; 453 Simplified = true; 454 LLVM_DEBUG(dbgs() << "Removing redundant splat: "); 455 LLVM_DEBUG(MI.dump()); 456 } 457 break; 458 } 459 case PPC::VSPLTB: 460 case PPC::VSPLTH: 461 case PPC::XXSPLTW: { 462 unsigned MyOpcode = MI.getOpcode(); 463 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; 464 unsigned TrueReg = 465 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI); 466 if (!Register::isVirtualRegister(TrueReg)) 467 break; 468 MachineInstr *DefMI = MRI->getVRegDef(TrueReg); 469 if (!DefMI) 470 break; 471 unsigned DefOpcode = DefMI->getOpcode(); 472 auto isConvertOfSplat = [=]() -> bool { 473 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS) 474 return false; 475 Register ConvReg = DefMI->getOperand(1).getReg(); 476 if (!Register::isVirtualRegister(ConvReg)) 477 return false; 478 MachineInstr *Splt = MRI->getVRegDef(ConvReg); 479 return Splt && (Splt->getOpcode() == PPC::LXVWSX || 480 Splt->getOpcode() == PPC::XXSPLTW); 481 }; 482 bool AlreadySplat = (MyOpcode == DefOpcode) || 483 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) || 484 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) || 485 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) || 486 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) || 487 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)|| 488 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat()); 489 // If the instruction[s] that feed this splat have already splat 490 // the value, this splat is redundant. 491 if (AlreadySplat) { 492 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: "); 493 LLVM_DEBUG(MI.dump()); 494 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 495 MI.getOperand(0).getReg()) 496 .add(MI.getOperand(OpNo)); 497 ToErase = &MI; 498 Simplified = true; 499 } 500 // Splat fed by a shift. Usually when we align value to splat into 501 // vector element zero. 502 if (DefOpcode == PPC::XXSLDWI) { 503 Register ShiftRes = DefMI->getOperand(0).getReg(); 504 Register ShiftOp1 = DefMI->getOperand(1).getReg(); 505 Register ShiftOp2 = DefMI->getOperand(2).getReg(); 506 unsigned ShiftImm = DefMI->getOperand(3).getImm(); 507 unsigned SplatImm = MI.getOperand(2).getImm(); 508 if (ShiftOp1 == ShiftOp2) { 509 unsigned NewElem = (SplatImm + ShiftImm) & 0x3; 510 if (MRI->hasOneNonDBGUse(ShiftRes)) { 511 LLVM_DEBUG(dbgs() << "Removing redundant shift: "); 512 LLVM_DEBUG(DefMI->dump()); 513 ToErase = DefMI; 514 } 515 Simplified = true; 516 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm 517 << " to " << NewElem << " in instruction: "); 518 LLVM_DEBUG(MI.dump()); 519 MI.getOperand(1).setReg(ShiftOp1); 520 MI.getOperand(2).setImm(NewElem); 521 } 522 } 523 break; 524 } 525 case PPC::XVCVDPSP: { 526 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant. 527 unsigned TrueReg = 528 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); 529 if (!Register::isVirtualRegister(TrueReg)) 530 break; 531 MachineInstr *DefMI = MRI->getVRegDef(TrueReg); 532 533 // This can occur when building a vector of single precision or integer 534 // values. 535 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) { 536 unsigned DefsReg1 = 537 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); 538 unsigned DefsReg2 = 539 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); 540 if (!Register::isVirtualRegister(DefsReg1) || 541 !Register::isVirtualRegister(DefsReg2)) 542 break; 543 MachineInstr *P1 = MRI->getVRegDef(DefsReg1); 544 MachineInstr *P2 = MRI->getVRegDef(DefsReg2); 545 546 if (!P1 || !P2) 547 break; 548 549 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI 550 // and set any uses of that FRSP/XSRSP (in this MI) to the source of 551 // the FRSP/XSRSP. 552 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) { 553 unsigned Opc = RoundInstr->getOpcode(); 554 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) && 555 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) { 556 Simplified = true; 557 Register ConvReg1 = RoundInstr->getOperand(1).getReg(); 558 Register FRSPDefines = RoundInstr->getOperand(0).getReg(); 559 MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines)); 560 for (int i = 0, e = Use.getNumOperands(); i < e; ++i) 561 if (Use.getOperand(i).isReg() && 562 Use.getOperand(i).getReg() == FRSPDefines) 563 Use.getOperand(i).setReg(ConvReg1); 564 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n"); 565 LLVM_DEBUG(RoundInstr->dump()); 566 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n"); 567 LLVM_DEBUG(MI.dump()); 568 LLVM_DEBUG(dbgs() << "Through instruction:\n"); 569 LLVM_DEBUG(DefMI->dump()); 570 RoundInstr->eraseFromParent(); 571 } 572 }; 573 574 // If the input to XVCVDPSP is a vector that was built (even 575 // partially) out of FRSP's, the FRSP(s) can safely be removed 576 // since this instruction performs the same operation. 577 if (P1 != P2) { 578 removeFRSPIfPossible(P1); 579 removeFRSPIfPossible(P2); 580 break; 581 } 582 removeFRSPIfPossible(P1); 583 } 584 break; 585 } 586 case PPC::EXTSH: 587 case PPC::EXTSH8: 588 case PPC::EXTSH8_32_64: { 589 if (!EnableSExtElimination) break; 590 Register NarrowReg = MI.getOperand(1).getReg(); 591 if (!Register::isVirtualRegister(NarrowReg)) 592 break; 593 594 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); 595 // If we've used a zero-extending load that we will sign-extend, 596 // just do a sign-extending load. 597 if (SrcMI->getOpcode() == PPC::LHZ || 598 SrcMI->getOpcode() == PPC::LHZX) { 599 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg())) 600 break; 601 auto is64Bit = [] (unsigned Opcode) { 602 return Opcode == PPC::EXTSH8; 603 }; 604 auto isXForm = [] (unsigned Opcode) { 605 return Opcode == PPC::LHZX; 606 }; 607 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) { 608 if (is64Bit) 609 if (isXForm) return PPC::LHAX8; 610 else return PPC::LHA8; 611 else 612 if (isXForm) return PPC::LHAX; 613 else return PPC::LHA; 614 }; 615 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()), 616 isXForm(SrcMI->getOpcode())); 617 LLVM_DEBUG(dbgs() << "Zero-extending load\n"); 618 LLVM_DEBUG(SrcMI->dump()); 619 LLVM_DEBUG(dbgs() << "and sign-extension\n"); 620 LLVM_DEBUG(MI.dump()); 621 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n"); 622 SrcMI->setDesc(TII->get(Opc)); 623 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg()); 624 ToErase = &MI; 625 Simplified = true; 626 NumEliminatedSExt++; 627 } 628 break; 629 } 630 case PPC::EXTSW: 631 case PPC::EXTSW_32: 632 case PPC::EXTSW_32_64: { 633 if (!EnableSExtElimination) break; 634 Register NarrowReg = MI.getOperand(1).getReg(); 635 if (!Register::isVirtualRegister(NarrowReg)) 636 break; 637 638 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); 639 // If we've used a zero-extending load that we will sign-extend, 640 // just do a sign-extending load. 641 if (SrcMI->getOpcode() == PPC::LWZ || 642 SrcMI->getOpcode() == PPC::LWZX) { 643 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg())) 644 break; 645 auto is64Bit = [] (unsigned Opcode) { 646 return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64; 647 }; 648 auto isXForm = [] (unsigned Opcode) { 649 return Opcode == PPC::LWZX; 650 }; 651 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) { 652 if (is64Bit) 653 if (isXForm) return PPC::LWAX; 654 else return PPC::LWA; 655 else 656 if (isXForm) return PPC::LWAX_32; 657 else return PPC::LWA_32; 658 }; 659 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()), 660 isXForm(SrcMI->getOpcode())); 661 LLVM_DEBUG(dbgs() << "Zero-extending load\n"); 662 LLVM_DEBUG(SrcMI->dump()); 663 LLVM_DEBUG(dbgs() << "and sign-extension\n"); 664 LLVM_DEBUG(MI.dump()); 665 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n"); 666 SrcMI->setDesc(TII->get(Opc)); 667 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg()); 668 ToErase = &MI; 669 Simplified = true; 670 NumEliminatedSExt++; 671 } else if (MI.getOpcode() == PPC::EXTSW_32_64 && 672 TII->isSignExtended(*SrcMI)) { 673 // We can eliminate EXTSW if the input is known to be already 674 // sign-extended. 675 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); 676 Register TmpReg = 677 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); 678 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), 679 TmpReg); 680 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG), 681 MI.getOperand(0).getReg()) 682 .addReg(TmpReg) 683 .addReg(NarrowReg) 684 .addImm(PPC::sub_32); 685 ToErase = &MI; 686 Simplified = true; 687 NumEliminatedSExt++; 688 } 689 break; 690 } 691 case PPC::RLDICL: { 692 // We can eliminate RLDICL (e.g. for zero-extension) 693 // if all bits to clear are already zero in the input. 694 // This code assume following code sequence for zero-extension. 695 // %6 = COPY %5:sub_32; (optional) 696 // %8 = IMPLICIT_DEF; 697 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32; 698 if (!EnableZExtElimination) break; 699 700 if (MI.getOperand(2).getImm() != 0) 701 break; 702 703 Register SrcReg = MI.getOperand(1).getReg(); 704 if (!Register::isVirtualRegister(SrcReg)) 705 break; 706 707 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); 708 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG && 709 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg())) 710 break; 711 712 MachineInstr *ImpDefMI, *SubRegMI; 713 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); 714 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg()); 715 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break; 716 717 SrcMI = SubRegMI; 718 if (SubRegMI->getOpcode() == PPC::COPY) { 719 Register CopyReg = SubRegMI->getOperand(1).getReg(); 720 if (Register::isVirtualRegister(CopyReg)) 721 SrcMI = MRI->getVRegDef(CopyReg); 722 } 723 724 unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII); 725 if (MI.getOperand(3).getImm() <= KnownZeroCount) { 726 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n"); 727 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 728 MI.getOperand(0).getReg()) 729 .addReg(SrcReg); 730 ToErase = &MI; 731 Simplified = true; 732 NumEliminatedZExt++; 733 } 734 break; 735 } 736 737 // TODO: Any instruction that has an immediate form fed only by a PHI 738 // whose operands are all load immediate can be folded away. We currently 739 // do this for ADD instructions, but should expand it to arithmetic and 740 // binary instructions with immediate forms in the future. 741 case PPC::ADD4: 742 case PPC::ADD8: { 743 auto isSingleUsePHI = [&](MachineOperand *PhiOp) { 744 assert(PhiOp && "Invalid Operand!"); 745 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI); 746 747 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) && 748 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg()); 749 }; 750 751 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp, 752 MachineOperand *PhiOp) { 753 assert(PhiOp && "Invalid Operand!"); 754 assert(DominatorOp && "Invalid Operand!"); 755 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI); 756 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI); 757 758 // Note: the vregs only show up at odd indices position of PHI Node, 759 // the even indices position save the BB info. 760 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { 761 MachineInstr *LiMI = 762 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI); 763 if (!LiMI || 764 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8) 765 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) || 766 !MDT->dominates(DefDomMI, LiMI)) 767 return false; 768 } 769 770 return true; 771 }; 772 773 MachineOperand Op1 = MI.getOperand(1); 774 MachineOperand Op2 = MI.getOperand(2); 775 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2)) 776 std::swap(Op1, Op2); 777 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1)) 778 break; // We don't have an ADD fed by LI's that can be transformed 779 780 // Now we know that Op1 is the PHI node and Op2 is the dominator 781 Register DominatorReg = Op2.getReg(); 782 783 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8 784 ? &PPC::G8RC_and_G8RC_NOX0RegClass 785 : &PPC::GPRC_and_GPRC_NOR0RegClass; 786 MRI->setRegClass(DominatorReg, TRC); 787 788 // replace LIs with ADDIs 789 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI); 790 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { 791 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI); 792 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: "); 793 LLVM_DEBUG(LiMI->dump()); 794 795 // There could be repeated registers in the PHI, e.g: %1 = 796 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've 797 // already replaced the def instruction, skip. 798 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8) 799 continue; 800 801 assert((LiMI->getOpcode() == PPC::LI || 802 LiMI->getOpcode() == PPC::LI8) && 803 "Invalid Opcode!"); 804 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI 805 LiMI->RemoveOperand(1); // remove the imm of LI 806 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI 807 : PPC::ADDI8)); 808 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI) 809 .addReg(DominatorReg) 810 .addImm(LiImm); // restore the imm of LI 811 LLVM_DEBUG(LiMI->dump()); 812 } 813 814 // Replace ADD with COPY 815 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: "); 816 LLVM_DEBUG(MI.dump()); 817 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 818 MI.getOperand(0).getReg()) 819 .add(Op1); 820 ToErase = &MI; 821 Simplified = true; 822 NumOptADDLIs++; 823 break; 824 } 825 case PPC::RLDICR: { 826 Simplified |= emitRLDICWhenLoweringJumpTables(MI) || 827 combineSEXTAndSHL(MI, ToErase); 828 break; 829 } 830 case PPC::RLWINM: 831 case PPC::RLWINM_rec: 832 case PPC::RLWINM8: 833 case PPC::RLWINM8_rec: { 834 unsigned FoldingReg = MI.getOperand(1).getReg(); 835 if (!Register::isVirtualRegister(FoldingReg)) 836 break; 837 838 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); 839 if (SrcMI->getOpcode() != PPC::RLWINM && 840 SrcMI->getOpcode() != PPC::RLWINM_rec && 841 SrcMI->getOpcode() != PPC::RLWINM8 && 842 SrcMI->getOpcode() != PPC::RLWINM8_rec) 843 break; 844 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && 845 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && 846 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && 847 "Invalid PPC::RLWINM Instruction!"); 848 uint64_t SHSrc = SrcMI->getOperand(2).getImm(); 849 uint64_t SHMI = MI.getOperand(2).getImm(); 850 uint64_t MBSrc = SrcMI->getOperand(3).getImm(); 851 uint64_t MBMI = MI.getOperand(3).getImm(); 852 uint64_t MESrc = SrcMI->getOperand(4).getImm(); 853 uint64_t MEMI = MI.getOperand(4).getImm(); 854 855 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) && 856 "Invalid PPC::RLWINM Instruction!"); 857 858 // If MBMI is bigger than MEMI, we always can not get run of ones. 859 // RotatedSrcMask non-wrap: 860 // 0........31|32........63 861 // RotatedSrcMask: B---E B---E 862 // MaskMI: -----------|--E B------ 863 // Result: ----- --- (Bad candidate) 864 // 865 // RotatedSrcMask wrap: 866 // 0........31|32........63 867 // RotatedSrcMask: --E B----|--E B---- 868 // MaskMI: -----------|--E B------ 869 // Result: --- -----|--- ----- (Bad candidate) 870 // 871 // One special case is RotatedSrcMask is a full set mask. 872 // RotatedSrcMask full: 873 // 0........31|32........63 874 // RotatedSrcMask: ------EB---|-------EB--- 875 // MaskMI: -----------|--E B------ 876 // Result: -----------|--- ------- (Good candidate) 877 878 // Mark special case. 879 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31); 880 881 // For other MBMI > MEMI cases, just return. 882 if ((MBMI > MEMI) && !SrcMaskFull) 883 break; 884 885 // Handle MBMI <= MEMI cases. 886 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI); 887 // In MI, we only need low 32 bits of SrcMI, just consider about low 32 888 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0, 889 // while in PowerPC ISA, lowerest bit is at index 63. 890 APInt MaskSrc = 891 APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc); 892 // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is 893 // equal to highBit. 894 // If MBSrc - MESrc == 1, we expect a full set mask instead of Null. 895 if (SrcMaskFull && (MBSrc - MESrc == 1)) 896 MaskSrc.setAllBits(); 897 898 APInt RotatedSrcMask = MaskSrc.rotl(SHMI); 899 APInt FinalMask = RotatedSrcMask & MaskMI; 900 uint32_t NewMB, NewME; 901 902 // If final mask is 0, MI result should be 0 too. 903 if (FinalMask.isNullValue()) { 904 bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 || 905 MI.getOpcode() == PPC::RLWINM8_rec); 906 907 Simplified = true; 908 909 LLVM_DEBUG(dbgs() << "Replace Instr: "); 910 LLVM_DEBUG(MI.dump()); 911 912 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) { 913 // Replace MI with "LI 0" 914 MI.RemoveOperand(4); 915 MI.RemoveOperand(3); 916 MI.RemoveOperand(2); 917 MI.getOperand(1).ChangeToImmediate(0); 918 MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI)); 919 } else { 920 // Replace MI with "ANDI_rec reg, 0" 921 MI.RemoveOperand(4); 922 MI.RemoveOperand(3); 923 MI.getOperand(2).setImm(0); 924 MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); 925 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); 926 if (SrcMI->getOperand(1).isKill()) { 927 MI.getOperand(1).setIsKill(true); 928 SrcMI->getOperand(1).setIsKill(false); 929 } else 930 // About to replace MI.getOperand(1), clear its kill flag. 931 MI.getOperand(1).setIsKill(false); 932 } 933 934 LLVM_DEBUG(dbgs() << "With: "); 935 LLVM_DEBUG(MI.dump()); 936 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, 937 NewME) && NewMB <= NewME)|| SrcMaskFull) { 938 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger 939 // than NewME. Otherwise we get a 64 bit value after folding, but MI 940 // return a 32 bit value. 941 942 Simplified = true; 943 LLVM_DEBUG(dbgs() << "Converting Instr: "); 944 LLVM_DEBUG(MI.dump()); 945 946 uint16_t NewSH = (SHSrc + SHMI) % 32; 947 MI.getOperand(2).setImm(NewSH); 948 // If SrcMI mask is full, no need to update MBMI and MEMI. 949 if (!SrcMaskFull) { 950 MI.getOperand(3).setImm(NewMB); 951 MI.getOperand(4).setImm(NewME); 952 } 953 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); 954 if (SrcMI->getOperand(1).isKill()) { 955 MI.getOperand(1).setIsKill(true); 956 SrcMI->getOperand(1).setIsKill(false); 957 } else 958 // About to replace MI.getOperand(1), clear its kill flag. 959 MI.getOperand(1).setIsKill(false); 960 961 LLVM_DEBUG(dbgs() << "To: "); 962 LLVM_DEBUG(MI.dump()); 963 } 964 if (Simplified) { 965 // If FoldingReg has no non-debug use and it has no implicit def (it 966 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI. 967 // Otherwise keep it. 968 ++NumRotatesCollapsed; 969 if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) { 970 ToErase = SrcMI; 971 LLVM_DEBUG(dbgs() << "Delete dead instruction: "); 972 LLVM_DEBUG(SrcMI->dump()); 973 } 974 } 975 break; 976 } 977 } 978 } 979 980 // If the last instruction was marked for elimination, 981 // remove it now. 982 if (ToErase) { 983 ToErase->eraseFromParent(); 984 ToErase = nullptr; 985 } 986 } 987 988 // Eliminate all the TOC save instructions which are redundant. 989 Simplified |= eliminateRedundantTOCSaves(TOCSaves); 990 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 991 if (FI->mustSaveTOC()) 992 NumTOCSavesInPrologue++; 993 994 // We try to eliminate redundant compare instruction. 995 Simplified |= eliminateRedundantCompare(); 996 997 return Simplified; 998 } 999 1000 // helper functions for eliminateRedundantCompare 1001 static bool isEqOrNe(MachineInstr *BI) { 1002 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm(); 1003 unsigned PredCond = PPC::getPredicateCondition(Pred); 1004 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE); 1005 } 1006 1007 static bool isSupportedCmpOp(unsigned opCode) { 1008 return (opCode == PPC::CMPLD || opCode == PPC::CMPD || 1009 opCode == PPC::CMPLW || opCode == PPC::CMPW || 1010 opCode == PPC::CMPLDI || opCode == PPC::CMPDI || 1011 opCode == PPC::CMPLWI || opCode == PPC::CMPWI); 1012 } 1013 1014 static bool is64bitCmpOp(unsigned opCode) { 1015 return (opCode == PPC::CMPLD || opCode == PPC::CMPD || 1016 opCode == PPC::CMPLDI || opCode == PPC::CMPDI); 1017 } 1018 1019 static bool isSignedCmpOp(unsigned opCode) { 1020 return (opCode == PPC::CMPD || opCode == PPC::CMPW || 1021 opCode == PPC::CMPDI || opCode == PPC::CMPWI); 1022 } 1023 1024 static unsigned getSignedCmpOpCode(unsigned opCode) { 1025 if (opCode == PPC::CMPLD) return PPC::CMPD; 1026 if (opCode == PPC::CMPLW) return PPC::CMPW; 1027 if (opCode == PPC::CMPLDI) return PPC::CMPDI; 1028 if (opCode == PPC::CMPLWI) return PPC::CMPWI; 1029 return opCode; 1030 } 1031 1032 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or 1033 // (LT x) to (LE x-1) 1034 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) { 1035 uint64_t Imm = CMPI->getOperand(2).getImm(); 1036 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode()); 1037 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000)) 1038 return 0; 1039 1040 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm(); 1041 unsigned PredCond = PPC::getPredicateCondition(Pred); 1042 unsigned PredHint = PPC::getPredicateHint(Pred); 1043 if (PredCond == PPC::PRED_GE) 1044 return PPC::getPredicate(PPC::PRED_GT, PredHint); 1045 if (PredCond == PPC::PRED_LT) 1046 return PPC::getPredicate(PPC::PRED_LE, PredHint); 1047 1048 return 0; 1049 } 1050 1051 // We can increment immediate x in (GT x) by changing it to (GE x+1) or 1052 // (LE x) to (LT x+1) 1053 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) { 1054 uint64_t Imm = CMPI->getOperand(2).getImm(); 1055 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode()); 1056 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF)) 1057 return 0; 1058 1059 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm(); 1060 unsigned PredCond = PPC::getPredicateCondition(Pred); 1061 unsigned PredHint = PPC::getPredicateHint(Pred); 1062 if (PredCond == PPC::PRED_GT) 1063 return PPC::getPredicate(PPC::PRED_GE, PredHint); 1064 if (PredCond == PPC::PRED_LE) 1065 return PPC::getPredicate(PPC::PRED_LT, PredHint); 1066 1067 return 0; 1068 } 1069 1070 // This takes a Phi node and returns a register value for the specified BB. 1071 static unsigned getIncomingRegForBlock(MachineInstr *Phi, 1072 MachineBasicBlock *MBB) { 1073 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) { 1074 MachineOperand &MO = Phi->getOperand(I); 1075 if (MO.getMBB() == MBB) 1076 return Phi->getOperand(I-1).getReg(); 1077 } 1078 llvm_unreachable("invalid src basic block for this Phi node\n"); 1079 return 0; 1080 } 1081 1082 // This function tracks the source of the register through register copy. 1083 // If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2 1084 // assuming that the control comes from BB1 into BB2. 1085 static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1, 1086 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) { 1087 unsigned SrcReg = Reg; 1088 while (1) { 1089 unsigned NextReg = SrcReg; 1090 MachineInstr *Inst = MRI->getVRegDef(SrcReg); 1091 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) { 1092 NextReg = getIncomingRegForBlock(Inst, BB1); 1093 // We track through PHI only once to avoid infinite loop. 1094 BB1 = nullptr; 1095 } 1096 else if (Inst->isFullCopy()) 1097 NextReg = Inst->getOperand(1).getReg(); 1098 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg)) 1099 break; 1100 SrcReg = NextReg; 1101 } 1102 return SrcReg; 1103 } 1104 1105 static bool eligibleForCompareElimination(MachineBasicBlock &MBB, 1106 MachineBasicBlock *&PredMBB, 1107 MachineBasicBlock *&MBBtoMoveCmp, 1108 MachineRegisterInfo *MRI) { 1109 1110 auto isEligibleBB = [&](MachineBasicBlock &BB) { 1111 auto BII = BB.getFirstInstrTerminator(); 1112 // We optimize BBs ending with a conditional branch. 1113 // We check only for BCC here, not BCCLR, because BCCLR 1114 // will be formed only later in the pipeline. 1115 if (BB.succ_size() == 2 && 1116 BII != BB.instr_end() && 1117 (*BII).getOpcode() == PPC::BCC && 1118 (*BII).getOperand(1).isReg()) { 1119 // We optimize only if the condition code is used only by one BCC. 1120 Register CndReg = (*BII).getOperand(1).getReg(); 1121 if (!Register::isVirtualRegister(CndReg) || !MRI->hasOneNonDBGUse(CndReg)) 1122 return false; 1123 1124 MachineInstr *CMPI = MRI->getVRegDef(CndReg); 1125 // We assume compare and branch are in the same BB for ease of analysis. 1126 if (CMPI->getParent() != &BB) 1127 return false; 1128 1129 // We skip this BB if a physical register is used in comparison. 1130 for (MachineOperand &MO : CMPI->operands()) 1131 if (MO.isReg() && !Register::isVirtualRegister(MO.getReg())) 1132 return false; 1133 1134 return true; 1135 } 1136 return false; 1137 }; 1138 1139 // If this BB has more than one successor, we can create a new BB and 1140 // move the compare instruction in the new BB. 1141 // So far, we do not move compare instruction to a BB having multiple 1142 // successors to avoid potentially increasing code size. 1143 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) { 1144 return BB.succ_size() == 1; 1145 }; 1146 1147 if (!isEligibleBB(MBB)) 1148 return false; 1149 1150 unsigned NumPredBBs = MBB.pred_size(); 1151 if (NumPredBBs == 1) { 1152 MachineBasicBlock *TmpMBB = *MBB.pred_begin(); 1153 if (isEligibleBB(*TmpMBB)) { 1154 PredMBB = TmpMBB; 1155 MBBtoMoveCmp = nullptr; 1156 return true; 1157 } 1158 } 1159 else if (NumPredBBs == 2) { 1160 // We check for partially redundant case. 1161 // So far, we support cases with only two predecessors 1162 // to avoid increasing the number of instructions. 1163 MachineBasicBlock::pred_iterator PI = MBB.pred_begin(); 1164 MachineBasicBlock *Pred1MBB = *PI; 1165 MachineBasicBlock *Pred2MBB = *(PI+1); 1166 1167 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) { 1168 // We assume Pred1MBB is the BB containing the compare to be merged and 1169 // Pred2MBB is the BB to which we will append a compare instruction. 1170 // Hence we can proceed as is. 1171 } 1172 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) { 1173 // We need to swap Pred1MBB and Pred2MBB to canonicalize. 1174 std::swap(Pred1MBB, Pred2MBB); 1175 } 1176 else return false; 1177 1178 // Here, Pred2MBB is the BB to which we need to append a compare inst. 1179 // We cannot move the compare instruction if operands are not available 1180 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI). 1181 MachineInstr *BI = &*MBB.getFirstInstrTerminator(); 1182 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg()); 1183 for (int I = 1; I <= 2; I++) 1184 if (CMPI->getOperand(I).isReg()) { 1185 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg()); 1186 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI) 1187 return false; 1188 } 1189 1190 PredMBB = Pred1MBB; 1191 MBBtoMoveCmp = Pred2MBB; 1192 return true; 1193 } 1194 1195 return false; 1196 } 1197 1198 // This function will iterate over the input map containing a pair of TOC save 1199 // instruction and a flag. The flag will be set to false if the TOC save is 1200 // proven redundant. This function will erase from the basic block all the TOC 1201 // saves marked as redundant. 1202 bool PPCMIPeephole::eliminateRedundantTOCSaves( 1203 std::map<MachineInstr *, bool> &TOCSaves) { 1204 bool Simplified = false; 1205 int NumKept = 0; 1206 for (auto TOCSave : TOCSaves) { 1207 if (!TOCSave.second) { 1208 TOCSave.first->eraseFromParent(); 1209 RemoveTOCSave++; 1210 Simplified = true; 1211 } else { 1212 NumKept++; 1213 } 1214 } 1215 1216 if (NumKept > 1) 1217 MultiTOCSaves++; 1218 1219 return Simplified; 1220 } 1221 1222 // If multiple conditional branches are executed based on the (essentially) 1223 // same comparison, we merge compare instructions into one and make multiple 1224 // conditional branches on this comparison. 1225 // For example, 1226 // if (a == 0) { ... } 1227 // else if (a < 0) { ... } 1228 // can be executed by one compare and two conditional branches instead of 1229 // two pairs of a compare and a conditional branch. 1230 // 1231 // This method merges two compare instructions in two MBBs and modifies the 1232 // compare and conditional branch instructions if needed. 1233 // For the above example, the input for this pass looks like: 1234 // cmplwi r3, 0 1235 // beq 0, .LBB0_3 1236 // cmpwi r3, -1 1237 // bgt 0, .LBB0_4 1238 // So, before merging two compares, we need to modify these instructions as 1239 // cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq 1240 // beq 0, .LBB0_3 1241 // cmpwi r3, 0 ; greather than -1 means greater or equal to 0 1242 // bge 0, .LBB0_4 1243 1244 bool PPCMIPeephole::eliminateRedundantCompare(void) { 1245 bool Simplified = false; 1246 1247 for (MachineBasicBlock &MBB2 : *MF) { 1248 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr; 1249 1250 // For fully redundant case, we select two basic blocks MBB1 and MBB2 1251 // as an optimization target if 1252 // - both MBBs end with a conditional branch, 1253 // - MBB1 is the only predecessor of MBB2, and 1254 // - compare does not take a physical register as a operand in both MBBs. 1255 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr. 1256 // 1257 // As partially redundant case, we additionally handle if MBB2 has one 1258 // additional predecessor, which has only one successor (MBB2). 1259 // In this case, we move the compare instruction originally in MBB2 into 1260 // MBBtoMoveCmp. This partially redundant case is typically appear by 1261 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader. 1262 // 1263 // Overview of CFG of related basic blocks 1264 // Fully redundant case Partially redundant case 1265 // -------- ---------------- -------- 1266 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ) 1267 // -------- ---------------- -------- 1268 // | \ (w/ 1 succ) \ | \ 1269 // | \ \ | \ 1270 // | \ | 1271 // -------- -------- 1272 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred 1273 // -------- and 2 succ) -------- and 2 succ) 1274 // | \ | \ 1275 // | \ | \ 1276 // 1277 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI)) 1278 continue; 1279 1280 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator(); 1281 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg()); 1282 1283 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator(); 1284 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg()); 1285 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr); 1286 1287 // We cannot optimize an unsupported compare opcode or 1288 // a mix of 32-bit and 64-bit comaprisons 1289 if (!isSupportedCmpOp(CMPI1->getOpcode()) || 1290 !isSupportedCmpOp(CMPI2->getOpcode()) || 1291 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode())) 1292 continue; 1293 1294 unsigned NewOpCode = 0; 1295 unsigned NewPredicate1 = 0, NewPredicate2 = 0; 1296 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0; 1297 bool SwapOperands = false; 1298 1299 if (CMPI1->getOpcode() != CMPI2->getOpcode()) { 1300 // Typically, unsigned comparison is used for equality check, but 1301 // we replace it with a signed comparison if the comparison 1302 // to be merged is a signed comparison. 1303 // In other cases of opcode mismatch, we cannot optimize this. 1304 1305 // We cannot change opcode when comparing against an immediate 1306 // if the most significant bit of the immediate is one 1307 // due to the difference in sign extension. 1308 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) { 1309 if (!I->getOperand(2).isImm()) 1310 return false; 1311 int16_t Imm = (int16_t)I->getOperand(2).getImm(); 1312 return Imm < 0; 1313 }; 1314 1315 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) && 1316 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode())) 1317 NewOpCode = CMPI1->getOpcode(); 1318 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) && 1319 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode()) 1320 NewOpCode = CMPI2->getOpcode(); 1321 else continue; 1322 } 1323 1324 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) { 1325 // In case of comparisons between two registers, these two registers 1326 // must be same to merge two comparisons. 1327 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(), 1328 nullptr, nullptr, MRI); 1329 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(), 1330 nullptr, nullptr, MRI); 1331 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(), 1332 MBB1, &MBB2, MRI); 1333 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(), 1334 MBB1, &MBB2, MRI); 1335 1336 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) { 1337 // Same pair of registers in the same order; ready to merge as is. 1338 } 1339 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) { 1340 // Same pair of registers in different order. 1341 // We reverse the predicate to merge compare instructions. 1342 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm(); 1343 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred); 1344 // In case of partial redundancy, we need to swap operands 1345 // in another compare instruction. 1346 SwapOperands = true; 1347 } 1348 else continue; 1349 } 1350 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) { 1351 // In case of comparisons between a register and an immediate, 1352 // the operand register must be same for two compare instructions. 1353 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(), 1354 nullptr, nullptr, MRI); 1355 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(), 1356 MBB1, &MBB2, MRI); 1357 if (Cmp1Operand1 != Cmp2Operand1) 1358 continue; 1359 1360 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm(); 1361 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm(); 1362 1363 // If immediate are not same, we try to adjust by changing predicate; 1364 // e.g. GT imm means GE (imm+1). 1365 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) { 1366 int Diff = Imm1 - Imm2; 1367 if (Diff < -2 || Diff > 2) 1368 continue; 1369 1370 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1); 1371 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1); 1372 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2); 1373 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2); 1374 if (Diff == 2) { 1375 if (PredToInc2 && PredToDec1) { 1376 NewPredicate2 = PredToInc2; 1377 NewPredicate1 = PredToDec1; 1378 NewImm2++; 1379 NewImm1--; 1380 } 1381 } 1382 else if (Diff == 1) { 1383 if (PredToInc2) { 1384 NewImm2++; 1385 NewPredicate2 = PredToInc2; 1386 } 1387 else if (PredToDec1) { 1388 NewImm1--; 1389 NewPredicate1 = PredToDec1; 1390 } 1391 } 1392 else if (Diff == -1) { 1393 if (PredToDec2) { 1394 NewImm2--; 1395 NewPredicate2 = PredToDec2; 1396 } 1397 else if (PredToInc1) { 1398 NewImm1++; 1399 NewPredicate1 = PredToInc1; 1400 } 1401 } 1402 else if (Diff == -2) { 1403 if (PredToDec2 && PredToInc1) { 1404 NewPredicate2 = PredToDec2; 1405 NewPredicate1 = PredToInc1; 1406 NewImm2--; 1407 NewImm1++; 1408 } 1409 } 1410 } 1411 1412 // We cannot merge two compares if the immediates are not same. 1413 if (NewImm2 != NewImm1) 1414 continue; 1415 } 1416 1417 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n"); 1418 LLVM_DEBUG(CMPI1->dump()); 1419 LLVM_DEBUG(BI1->dump()); 1420 LLVM_DEBUG(CMPI2->dump()); 1421 LLVM_DEBUG(BI2->dump()); 1422 1423 // We adjust opcode, predicates and immediate as we determined above. 1424 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) { 1425 CMPI1->setDesc(TII->get(NewOpCode)); 1426 } 1427 if (NewPredicate1) { 1428 BI1->getOperand(0).setImm(NewPredicate1); 1429 } 1430 if (NewPredicate2) { 1431 BI2->getOperand(0).setImm(NewPredicate2); 1432 } 1433 if (NewImm1 != Imm1) { 1434 CMPI1->getOperand(2).setImm(NewImm1); 1435 } 1436 1437 if (IsPartiallyRedundant) { 1438 // We touch up the compare instruction in MBB2 and move it to 1439 // a previous BB to handle partially redundant case. 1440 if (SwapOperands) { 1441 Register Op1 = CMPI2->getOperand(1).getReg(); 1442 Register Op2 = CMPI2->getOperand(2).getReg(); 1443 CMPI2->getOperand(1).setReg(Op2); 1444 CMPI2->getOperand(2).setReg(Op1); 1445 } 1446 if (NewImm2 != Imm2) 1447 CMPI2->getOperand(2).setImm(NewImm2); 1448 1449 for (int I = 1; I <= 2; I++) { 1450 if (CMPI2->getOperand(I).isReg()) { 1451 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg()); 1452 if (Inst->getParent() != &MBB2) 1453 continue; 1454 1455 assert(Inst->getOpcode() == PPC::PHI && 1456 "We cannot support if an operand comes from this BB."); 1457 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp); 1458 CMPI2->getOperand(I).setReg(SrcReg); 1459 } 1460 } 1461 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator()); 1462 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2)); 1463 1464 DebugLoc DL = CMPI2->getDebugLoc(); 1465 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass); 1466 BuildMI(MBB2, MBB2.begin(), DL, 1467 TII->get(PPC::PHI), NewVReg) 1468 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1) 1469 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp); 1470 BI2->getOperand(1).setReg(NewVReg); 1471 } 1472 else { 1473 // We finally eliminate compare instruction in MBB2. 1474 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg()); 1475 CMPI2->eraseFromParent(); 1476 } 1477 BI2->getOperand(1).setIsKill(true); 1478 BI1->getOperand(1).setIsKill(false); 1479 1480 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n"); 1481 LLVM_DEBUG(CMPI1->dump()); 1482 LLVM_DEBUG(BI1->dump()); 1483 LLVM_DEBUG(BI2->dump()); 1484 if (IsPartiallyRedundant) { 1485 LLVM_DEBUG(dbgs() << "The following compare is moved into " 1486 << printMBBReference(*MBBtoMoveCmp) 1487 << " to handle partial redundancy.\n"); 1488 LLVM_DEBUG(CMPI2->dump()); 1489 } 1490 1491 Simplified = true; 1492 } 1493 1494 return Simplified; 1495 } 1496 1497 // We miss the opportunity to emit an RLDIC when lowering jump tables 1498 // since ISEL sees only a single basic block. When selecting, the clear 1499 // and shift left will be in different blocks. 1500 bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) { 1501 if (MI.getOpcode() != PPC::RLDICR) 1502 return false; 1503 1504 Register SrcReg = MI.getOperand(1).getReg(); 1505 if (!Register::isVirtualRegister(SrcReg)) 1506 return false; 1507 1508 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); 1509 if (SrcMI->getOpcode() != PPC::RLDICL) 1510 return false; 1511 1512 MachineOperand MOpSHSrc = SrcMI->getOperand(2); 1513 MachineOperand MOpMBSrc = SrcMI->getOperand(3); 1514 MachineOperand MOpSHMI = MI.getOperand(2); 1515 MachineOperand MOpMEMI = MI.getOperand(3); 1516 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() && 1517 MOpMEMI.isImm())) 1518 return false; 1519 1520 uint64_t SHSrc = MOpSHSrc.getImm(); 1521 uint64_t MBSrc = MOpMBSrc.getImm(); 1522 uint64_t SHMI = MOpSHMI.getImm(); 1523 uint64_t MEMI = MOpMEMI.getImm(); 1524 uint64_t NewSH = SHSrc + SHMI; 1525 uint64_t NewMB = MBSrc - SHMI; 1526 if (NewMB > 63 || NewSH > 63) 1527 return false; 1528 1529 // The bits cleared with RLDICL are [0, MBSrc). 1530 // The bits cleared with RLDICR are (MEMI, 63]. 1531 // After the sequence, the bits cleared are: 1532 // [0, MBSrc-SHMI) and (MEMI, 63). 1533 // 1534 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63]. 1535 if ((63 - NewSH) != MEMI) 1536 return false; 1537 1538 LLVM_DEBUG(dbgs() << "Converting pair: "); 1539 LLVM_DEBUG(SrcMI->dump()); 1540 LLVM_DEBUG(MI.dump()); 1541 1542 MI.setDesc(TII->get(PPC::RLDIC)); 1543 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); 1544 MI.getOperand(2).setImm(NewSH); 1545 MI.getOperand(3).setImm(NewMB); 1546 1547 LLVM_DEBUG(dbgs() << "To: "); 1548 LLVM_DEBUG(MI.dump()); 1549 NumRotatesCollapsed++; 1550 return true; 1551 } 1552 1553 // For case in LLVM IR 1554 // entry: 1555 // %iconv = sext i32 %index to i64 1556 // br i1 undef label %true, label %false 1557 // true: 1558 // %ptr = getelementptr inbounds i32, i32* null, i64 %iconv 1559 // ... 1560 // PPCISelLowering::combineSHL fails to combine, because sext and shl are in 1561 // different BBs when conducting instruction selection. We can do a peephole 1562 // optimization to combine these two instructions into extswsli after 1563 // instruction selection. 1564 bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI, 1565 MachineInstr *&ToErase) { 1566 if (MI.getOpcode() != PPC::RLDICR) 1567 return false; 1568 1569 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0()) 1570 return false; 1571 1572 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands"); 1573 1574 MachineOperand MOpSHMI = MI.getOperand(2); 1575 MachineOperand MOpMEMI = MI.getOperand(3); 1576 if (!(MOpSHMI.isImm() && MOpMEMI.isImm())) 1577 return false; 1578 1579 uint64_t SHMI = MOpSHMI.getImm(); 1580 uint64_t MEMI = MOpMEMI.getImm(); 1581 if (SHMI + MEMI != 63) 1582 return false; 1583 1584 Register SrcReg = MI.getOperand(1).getReg(); 1585 if (!Register::isVirtualRegister(SrcReg)) 1586 return false; 1587 1588 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); 1589 if (SrcMI->getOpcode() != PPC::EXTSW && 1590 SrcMI->getOpcode() != PPC::EXTSW_32_64) 1591 return false; 1592 1593 // If the register defined by extsw has more than one use, combination is not 1594 // needed. 1595 if (!MRI->hasOneNonDBGUse(SrcReg)) 1596 return false; 1597 1598 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands"); 1599 assert(SrcMI->getOperand(1).isReg() && 1600 "EXTSW's second operand should be a register"); 1601 if (!Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) 1602 return false; 1603 1604 LLVM_DEBUG(dbgs() << "Combining pair: "); 1605 LLVM_DEBUG(SrcMI->dump()); 1606 LLVM_DEBUG(MI.dump()); 1607 1608 MachineInstr *NewInstr = 1609 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), 1610 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI) 1611 : TII->get(PPC::EXTSWSLI_32_64), 1612 MI.getOperand(0).getReg()) 1613 .add(SrcMI->getOperand(1)) 1614 .add(MOpSHMI); 1615 (void)NewInstr; 1616 1617 LLVM_DEBUG(dbgs() << "TO: "); 1618 LLVM_DEBUG(NewInstr->dump()); 1619 ++NumEXTSWAndSLDICombined; 1620 ToErase = &MI; 1621 // SrcMI, which is extsw, is of no use now, erase it. 1622 SrcMI->eraseFromParent(); 1623 return true; 1624 } 1625 1626 } // end default namespace 1627 1628 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE, 1629 "PowerPC MI Peephole Optimization", false, false) 1630 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) 1631 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1632 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 1633 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE, 1634 "PowerPC MI Peephole Optimization", false, false) 1635 1636 char PPCMIPeephole::ID = 0; 1637 FunctionPass* 1638 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); } 1639 1640