1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===---------------------------------------------------------------------===// 9 // 10 // This pass performs peephole optimizations to clean up ugly code 11 // sequences at the MachineInstruction layer. It runs at the end of 12 // the SSA phases, following VSX swap removal. A pass of dead code 13 // elimination follows this one for quick clean-up of any dead 14 // instructions introduced here. Although we could do this as callbacks 15 // from the generic peephole pass, this would have a couple of bad 16 // effects: it might remove optimization opportunities for VSX swap 17 // removal, and it would miss cleanups made possible following VSX 18 // swap removal. 19 // 20 //===---------------------------------------------------------------------===// 21 22 #include "PPC.h" 23 #include "PPCInstrBuilder.h" 24 #include "PPCInstrInfo.h" 25 #include "PPCTargetMachine.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/CodeGen/MachineDominators.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/Support/Debug.h" 32 #include "MCTargetDesc/PPCPredicates.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "ppc-mi-peepholes" 37 38 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); 39 40 namespace llvm { 41 void initializePPCMIPeepholePass(PassRegistry&); 42 } 43 44 namespace { 45 46 struct PPCMIPeephole : public MachineFunctionPass { 47 48 static char ID; 49 const PPCInstrInfo *TII; 50 MachineFunction *MF; 51 MachineRegisterInfo *MRI; 52 53 PPCMIPeephole() : MachineFunctionPass(ID) { 54 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry()); 55 } 56 57 private: 58 MachineDominatorTree *MDT; 59 60 // Initialize class variables. 61 void initialize(MachineFunction &MFParm); 62 63 // Perform peepholes. 64 bool simplifyCode(void); 65 66 // Perform peepholes. 67 bool eliminateRedundantCompare(void); 68 69 // Find the "true" register represented by SrcReg (following chains 70 // of copies and subreg_to_reg operations). 71 unsigned lookThruCopyLike(unsigned SrcReg); 72 73 public: 74 75 void getAnalysisUsage(AnalysisUsage &AU) const override { 76 AU.addRequired<MachineDominatorTree>(); 77 AU.addPreserved<MachineDominatorTree>(); 78 MachineFunctionPass::getAnalysisUsage(AU); 79 } 80 81 // Main entry point for this pass. 82 bool runOnMachineFunction(MachineFunction &MF) override { 83 if (skipFunction(*MF.getFunction())) 84 return false; 85 initialize(MF); 86 return simplifyCode(); 87 } 88 }; 89 90 // Initialize class variables. 91 void PPCMIPeephole::initialize(MachineFunction &MFParm) { 92 MF = &MFParm; 93 MRI = &MF->getRegInfo(); 94 MDT = &getAnalysis<MachineDominatorTree>(); 95 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 96 DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n"); 97 DEBUG(MF->dump()); 98 } 99 100 static MachineInstr *getVRegDefOrNull(MachineOperand *Op, 101 MachineRegisterInfo *MRI) { 102 assert(Op && "Invalid Operand!"); 103 if (!Op->isReg()) 104 return nullptr; 105 106 unsigned Reg = Op->getReg(); 107 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 108 return nullptr; 109 110 return MRI->getVRegDef(Reg); 111 } 112 113 // Perform peephole optimizations. 114 bool PPCMIPeephole::simplifyCode(void) { 115 bool Simplified = false; 116 MachineInstr* ToErase = nullptr; 117 118 for (MachineBasicBlock &MBB : *MF) { 119 for (MachineInstr &MI : MBB) { 120 121 // If the previous instruction was marked for elimination, 122 // remove it now. 123 if (ToErase) { 124 ToErase->eraseFromParent(); 125 ToErase = nullptr; 126 } 127 128 // Ignore debug instructions. 129 if (MI.isDebugValue()) 130 continue; 131 132 // Per-opcode peepholes. 133 switch (MI.getOpcode()) { 134 135 default: 136 break; 137 138 case PPC::XXPERMDI: { 139 // Perform simplifications of 2x64 vector swaps and splats. 140 // A swap is identified by an immediate value of 2, and a splat 141 // is identified by an immediate value of 0 or 3. 142 int Immed = MI.getOperand(3).getImm(); 143 144 if (Immed != 1) { 145 146 // For each of these simplifications, we need the two source 147 // regs to match. Unfortunately, MachineCSE ignores COPY and 148 // SUBREG_TO_REG, so for example we can see 149 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed. 150 // We have to look through chains of COPY and SUBREG_TO_REG 151 // to find the real source values for comparison. 152 unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg()); 153 unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg()); 154 155 if (TrueReg1 == TrueReg2 156 && TargetRegisterInfo::isVirtualRegister(TrueReg1)) { 157 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); 158 unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0; 159 160 // If this is a splat fed by a splatting load, the splat is 161 // redundant. Replace with a copy. This doesn't happen directly due 162 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting 163 // a load of a double to a vector of 64-bit integers. 164 auto isConversionOfLoadAndSplat = [=]() -> bool { 165 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS) 166 return false; 167 unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg()); 168 if (TargetRegisterInfo::isVirtualRegister(DefReg)) { 169 MachineInstr *LoadMI = MRI->getVRegDef(DefReg); 170 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) 171 return true; 172 } 173 return false; 174 }; 175 if (DefMI && (Immed == 0 || Immed == 3)) { 176 if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) { 177 DEBUG(dbgs() 178 << "Optimizing load-and-splat/splat " 179 "to load-and-splat/copy: "); 180 DEBUG(MI.dump()); 181 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 182 MI.getOperand(0).getReg()) 183 .add(MI.getOperand(1)); 184 ToErase = &MI; 185 Simplified = true; 186 } 187 } 188 189 // If this is a splat or a swap fed by another splat, we 190 // can replace it with a copy. 191 if (DefOpc == PPC::XXPERMDI) { 192 unsigned FeedImmed = DefMI->getOperand(3).getImm(); 193 unsigned FeedReg1 194 = lookThruCopyLike(DefMI->getOperand(1).getReg()); 195 unsigned FeedReg2 196 = lookThruCopyLike(DefMI->getOperand(2).getReg()); 197 198 if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) { 199 DEBUG(dbgs() 200 << "Optimizing splat/swap or splat/splat " 201 "to splat/copy: "); 202 DEBUG(MI.dump()); 203 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 204 MI.getOperand(0).getReg()) 205 .add(MI.getOperand(1)); 206 ToErase = &MI; 207 Simplified = true; 208 } 209 210 // If this is a splat fed by a swap, we can simplify modify 211 // the splat to splat the other value from the swap's input 212 // parameter. 213 else if ((Immed == 0 || Immed == 3) 214 && FeedImmed == 2 && FeedReg1 == FeedReg2) { 215 DEBUG(dbgs() << "Optimizing swap/splat => splat: "); 216 DEBUG(MI.dump()); 217 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg()); 218 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg()); 219 MI.getOperand(3).setImm(3 - Immed); 220 Simplified = true; 221 } 222 223 // If this is a swap fed by a swap, we can replace it 224 // with a copy from the first swap's input. 225 else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) { 226 DEBUG(dbgs() << "Optimizing swap/swap => copy: "); 227 DEBUG(MI.dump()); 228 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 229 MI.getOperand(0).getReg()) 230 .add(DefMI->getOperand(1)); 231 ToErase = &MI; 232 Simplified = true; 233 } 234 } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs && 235 (DefMI->getOperand(2).getImm() == 0 || 236 DefMI->getOperand(2).getImm() == 3)) { 237 // Splat fed by another splat - switch the output of the first 238 // and remove the second. 239 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); 240 ToErase = &MI; 241 Simplified = true; 242 DEBUG(dbgs() << "Removing redundant splat: "); 243 DEBUG(MI.dump()); 244 } 245 } 246 } 247 break; 248 } 249 case PPC::VSPLTB: 250 case PPC::VSPLTH: 251 case PPC::XXSPLTW: { 252 unsigned MyOpcode = MI.getOpcode(); 253 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; 254 unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg()); 255 if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) 256 break; 257 MachineInstr *DefMI = MRI->getVRegDef(TrueReg); 258 if (!DefMI) 259 break; 260 unsigned DefOpcode = DefMI->getOpcode(); 261 auto isConvertOfSplat = [=]() -> bool { 262 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS) 263 return false; 264 unsigned ConvReg = DefMI->getOperand(1).getReg(); 265 if (!TargetRegisterInfo::isVirtualRegister(ConvReg)) 266 return false; 267 MachineInstr *Splt = MRI->getVRegDef(ConvReg); 268 return Splt && (Splt->getOpcode() == PPC::LXVWSX || 269 Splt->getOpcode() == PPC::XXSPLTW); 270 }; 271 bool AlreadySplat = (MyOpcode == DefOpcode) || 272 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) || 273 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) || 274 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) || 275 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) || 276 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)|| 277 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat()); 278 // If the instruction[s] that feed this splat have already splat 279 // the value, this splat is redundant. 280 if (AlreadySplat) { 281 DEBUG(dbgs() << "Changing redundant splat to a copy: "); 282 DEBUG(MI.dump()); 283 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 284 MI.getOperand(0).getReg()) 285 .add(MI.getOperand(OpNo)); 286 ToErase = &MI; 287 Simplified = true; 288 } 289 // Splat fed by a shift. Usually when we align value to splat into 290 // vector element zero. 291 if (DefOpcode == PPC::XXSLDWI) { 292 unsigned ShiftRes = DefMI->getOperand(0).getReg(); 293 unsigned ShiftOp1 = DefMI->getOperand(1).getReg(); 294 unsigned ShiftOp2 = DefMI->getOperand(2).getReg(); 295 unsigned ShiftImm = DefMI->getOperand(3).getImm(); 296 unsigned SplatImm = MI.getOperand(2).getImm(); 297 if (ShiftOp1 == ShiftOp2) { 298 unsigned NewElem = (SplatImm + ShiftImm) & 0x3; 299 if (MRI->hasOneNonDBGUse(ShiftRes)) { 300 DEBUG(dbgs() << "Removing redundant shift: "); 301 DEBUG(DefMI->dump()); 302 ToErase = DefMI; 303 } 304 Simplified = true; 305 DEBUG(dbgs() << "Changing splat immediate from " << SplatImm << 306 " to " << NewElem << " in instruction: "); 307 DEBUG(MI.dump()); 308 MI.getOperand(1).setReg(ShiftOp1); 309 MI.getOperand(2).setImm(NewElem); 310 } 311 } 312 break; 313 } 314 case PPC::XVCVDPSP: { 315 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant. 316 unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg()); 317 if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) 318 break; 319 MachineInstr *DefMI = MRI->getVRegDef(TrueReg); 320 321 // This can occur when building a vector of single precision or integer 322 // values. 323 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) { 324 unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg()); 325 unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg()); 326 if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) || 327 !TargetRegisterInfo::isVirtualRegister(DefsReg2)) 328 break; 329 MachineInstr *P1 = MRI->getVRegDef(DefsReg1); 330 MachineInstr *P2 = MRI->getVRegDef(DefsReg2); 331 332 if (!P1 || !P2) 333 break; 334 335 // Remove the passed FRSP instruction if it only feeds this MI and 336 // set any uses of that FRSP (in this MI) to the source of the FRSP. 337 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) { 338 if (RoundInstr->getOpcode() == PPC::FRSP && 339 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) { 340 Simplified = true; 341 unsigned ConvReg1 = RoundInstr->getOperand(1).getReg(); 342 unsigned FRSPDefines = RoundInstr->getOperand(0).getReg(); 343 MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines)); 344 for (int i = 0, e = Use.getNumOperands(); i < e; ++i) 345 if (Use.getOperand(i).isReg() && 346 Use.getOperand(i).getReg() == FRSPDefines) 347 Use.getOperand(i).setReg(ConvReg1); 348 DEBUG(dbgs() << "Removing redundant FRSP:\n"); 349 DEBUG(RoundInstr->dump()); 350 DEBUG(dbgs() << "As it feeds instruction:\n"); 351 DEBUG(MI.dump()); 352 DEBUG(dbgs() << "Through instruction:\n"); 353 DEBUG(DefMI->dump()); 354 RoundInstr->eraseFromParent(); 355 } 356 }; 357 358 // If the input to XVCVDPSP is a vector that was built (even 359 // partially) out of FRSP's, the FRSP(s) can safely be removed 360 // since this instruction performs the same operation. 361 if (P1 != P2) { 362 removeFRSPIfPossible(P1); 363 removeFRSPIfPossible(P2); 364 break; 365 } 366 removeFRSPIfPossible(P1); 367 } 368 break; 369 } 370 371 // TODO: Any instruction that has an immediate form fed only by a PHI 372 // whose operands are all load immediate can be folded away. We currently 373 // do this for ADD instructions, but should expand it to arithmetic and 374 // binary instructions with immediate forms in the future. 375 case PPC::ADD4: 376 case PPC::ADD8: { 377 auto isSingleUsePHI = [&](MachineOperand *PhiOp) { 378 assert(PhiOp && "Invalid Operand!"); 379 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI); 380 381 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) && 382 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg()); 383 }; 384 385 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp, 386 MachineOperand *PhiOp) { 387 assert(PhiOp && "Invalid Operand!"); 388 assert(DominatorOp && "Invalid Operand!"); 389 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI); 390 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI); 391 392 // Note: the vregs only show up at odd indices position of PHI Node, 393 // the even indices position save the BB info. 394 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { 395 MachineInstr *LiMI = 396 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI); 397 if (!LiMI || 398 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8) 399 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) || 400 !MDT->dominates(DefDomMI, LiMI)) 401 return false; 402 } 403 404 return true; 405 }; 406 407 MachineOperand Op1 = MI.getOperand(1); 408 MachineOperand Op2 = MI.getOperand(2); 409 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2)) 410 std::swap(Op1, Op2); 411 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1)) 412 break; // We don't have an ADD fed by LI's that can be transformed 413 414 // Now we know that Op1 is the PHI node and Op2 is the dominator 415 unsigned DominatorReg = Op2.getReg(); 416 417 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8 418 ? &PPC::G8RC_and_G8RC_NOX0RegClass 419 : &PPC::GPRC_and_GPRC_NOR0RegClass; 420 MRI->setRegClass(DominatorReg, TRC); 421 422 // replace LIs with ADDIs 423 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI); 424 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { 425 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI); 426 DEBUG(dbgs() << "Optimizing LI to ADDI: "); 427 DEBUG(LiMI->dump()); 428 429 // There could be repeated registers in the PHI, e.g: %vreg1<def> = 430 // PHI %vreg6, <BB#2>, %vreg8, <BB#3>, %vreg8, <BB#6>; So if we've 431 // already replaced the def instruction, skip. 432 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8) 433 continue; 434 435 assert((LiMI->getOpcode() == PPC::LI || 436 LiMI->getOpcode() == PPC::LI8) && 437 "Invalid Opcode!"); 438 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI 439 LiMI->RemoveOperand(1); // remove the imm of LI 440 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI 441 : PPC::ADDI8)); 442 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI) 443 .addReg(DominatorReg) 444 .addImm(LiImm); // restore the imm of LI 445 DEBUG(LiMI->dump()); 446 } 447 448 // Replace ADD with COPY 449 DEBUG(dbgs() << "Optimizing ADD to COPY: "); 450 DEBUG(MI.dump()); 451 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 452 MI.getOperand(0).getReg()) 453 .add(Op1); 454 ToErase = &MI; 455 Simplified = true; 456 NumOptADDLIs++; 457 break; 458 } 459 } 460 } 461 462 // If the last instruction was marked for elimination, 463 // remove it now. 464 if (ToErase) { 465 ToErase->eraseFromParent(); 466 ToErase = nullptr; 467 } 468 } 469 470 // We try to eliminate redundant compare instruction. 471 Simplified |= eliminateRedundantCompare(); 472 473 return Simplified; 474 } 475 476 // helper functions for eliminateRedundantCompare 477 static bool isEqOrNe(MachineInstr *BI) { 478 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm(); 479 unsigned PredCond = PPC::getPredicateCondition(Pred); 480 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE); 481 } 482 483 static bool isSupportedCmpOp(unsigned opCode) { 484 return (opCode == PPC::CMPLD || opCode == PPC::CMPD || 485 opCode == PPC::CMPLW || opCode == PPC::CMPW || 486 opCode == PPC::CMPLDI || opCode == PPC::CMPDI || 487 opCode == PPC::CMPLWI || opCode == PPC::CMPWI); 488 } 489 490 static bool is64bitCmpOp(unsigned opCode) { 491 return (opCode == PPC::CMPLD || opCode == PPC::CMPD || 492 opCode == PPC::CMPLDI || opCode == PPC::CMPDI); 493 } 494 495 static bool isSignedCmpOp(unsigned opCode) { 496 return (opCode == PPC::CMPD || opCode == PPC::CMPW || 497 opCode == PPC::CMPDI || opCode == PPC::CMPWI); 498 } 499 500 static unsigned getSignedCmpOpCode(unsigned opCode) { 501 if (opCode == PPC::CMPLD) return PPC::CMPD; 502 if (opCode == PPC::CMPLW) return PPC::CMPW; 503 if (opCode == PPC::CMPLDI) return PPC::CMPDI; 504 if (opCode == PPC::CMPLWI) return PPC::CMPWI; 505 return opCode; 506 } 507 508 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or 509 // (LT x) to (LE x-1) 510 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) { 511 uint64_t Imm = CMPI->getOperand(2).getImm(); 512 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode()); 513 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000)) 514 return 0; 515 516 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm(); 517 unsigned PredCond = PPC::getPredicateCondition(Pred); 518 unsigned PredHint = PPC::getPredicateHint(Pred); 519 if (PredCond == PPC::PRED_GE) 520 return PPC::getPredicate(PPC::PRED_GT, PredHint); 521 if (PredCond == PPC::PRED_LT) 522 return PPC::getPredicate(PPC::PRED_LE, PredHint); 523 524 return 0; 525 } 526 527 // We can increment immediate x in (GT x) by changing it to (GE x+1) or 528 // (LE x) to (LT x+1) 529 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) { 530 uint64_t Imm = CMPI->getOperand(2).getImm(); 531 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode()); 532 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF)) 533 return 0; 534 535 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm(); 536 unsigned PredCond = PPC::getPredicateCondition(Pred); 537 unsigned PredHint = PPC::getPredicateHint(Pred); 538 if (PredCond == PPC::PRED_GT) 539 return PPC::getPredicate(PPC::PRED_GE, PredHint); 540 if (PredCond == PPC::PRED_LE) 541 return PPC::getPredicate(PPC::PRED_LT, PredHint); 542 543 return 0; 544 } 545 546 // This takes a Phi node and returns a register value for the spefied BB. 547 static unsigned getIncomingRegForBlock(MachineInstr *Phi, 548 MachineBasicBlock *MBB) { 549 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) { 550 MachineOperand &MO = Phi->getOperand(I); 551 if (MO.getMBB() == MBB) 552 return Phi->getOperand(I-1).getReg(); 553 } 554 llvm_unreachable("invalid src basic block for this Phi node\n"); 555 return 0; 556 } 557 558 // This function tracks the source of the register through register copy. 559 // If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2 560 // assuming that the control comes from BB1 into BB2. 561 static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1, 562 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) { 563 unsigned SrcReg = Reg; 564 while (1) { 565 unsigned NextReg = SrcReg; 566 MachineInstr *Inst = MRI->getVRegDef(SrcReg); 567 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) { 568 NextReg = getIncomingRegForBlock(Inst, BB1); 569 // We track through PHI only once to avoid infinite loop. 570 BB1 = nullptr; 571 } 572 else if (Inst->isFullCopy()) 573 NextReg = Inst->getOperand(1).getReg(); 574 if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg)) 575 break; 576 SrcReg = NextReg; 577 } 578 return SrcReg; 579 } 580 581 static bool eligibleForCompareElimination(MachineBasicBlock &MBB, 582 MachineBasicBlock *&PredMBB, 583 MachineBasicBlock *&MBBtoMoveCmp, 584 MachineRegisterInfo *MRI) { 585 586 auto isEligibleBB = [&](MachineBasicBlock &BB) { 587 auto BII = BB.getFirstInstrTerminator(); 588 // We optimize BBs ending with a conditional branch. 589 // We check only for BCC here, not BCCLR, because BCCLR 590 // will be formed only later in the pipeline. 591 if (BB.succ_size() == 2 && 592 BII != BB.instr_end() && 593 (*BII).getOpcode() == PPC::BCC && 594 (*BII).getOperand(1).isReg()) { 595 // We optimize only if the condition code is used only by one BCC. 596 unsigned CndReg = (*BII).getOperand(1).getReg(); 597 if (!TargetRegisterInfo::isVirtualRegister(CndReg) || 598 !MRI->hasOneNonDBGUse(CndReg)) 599 return false; 600 601 // We skip this BB if a physical register is used in comparison. 602 MachineInstr *CMPI = MRI->getVRegDef(CndReg); 603 for (MachineOperand &MO : CMPI->operands()) 604 if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) 605 return false; 606 607 return true; 608 } 609 return false; 610 }; 611 612 // If this BB has more than one successor, we can create a new BB and 613 // move the compare instruction in the new BB. 614 // So far, we do not move compare instruction to a BB having multiple 615 // successors to avoid potentially increasing code size. 616 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) { 617 return BB.succ_size() == 1; 618 }; 619 620 if (!isEligibleBB(MBB)) 621 return false; 622 623 unsigned NumPredBBs = MBB.pred_size(); 624 if (NumPredBBs == 1) { 625 MachineBasicBlock *TmpMBB = *MBB.pred_begin(); 626 if (isEligibleBB(*TmpMBB)) { 627 PredMBB = TmpMBB; 628 MBBtoMoveCmp = nullptr; 629 return true; 630 } 631 } 632 else if (NumPredBBs == 2) { 633 // We check for partially redundant case. 634 // So far, we support cases with only two predecessors 635 // to avoid increasing the number of instructions. 636 MachineBasicBlock::pred_iterator PI = MBB.pred_begin(); 637 MachineBasicBlock *Pred1MBB = *PI; 638 MachineBasicBlock *Pred2MBB = *(PI+1); 639 640 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) { 641 // We assume Pred1MBB is the BB containing the compare to be merged and 642 // Pred2MBB is the BB to which we will append a compare instruction. 643 // Hence we can proceed as is. 644 } 645 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) { 646 // We need to swap Pred1MBB and Pred2MBB to canonicalize. 647 std::swap(Pred1MBB, Pred2MBB); 648 } 649 else return false; 650 651 // Here, Pred2MBB is the BB to which we need to append a compare inst. 652 // We cannot move the compare instruction if operands are not available 653 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI). 654 MachineInstr *BI = &*MBB.getFirstInstrTerminator(); 655 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg()); 656 for (int I = 1; I <= 2; I++) 657 if (CMPI->getOperand(I).isReg()) { 658 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg()); 659 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI) 660 return false; 661 } 662 663 PredMBB = Pred1MBB; 664 MBBtoMoveCmp = Pred2MBB; 665 return true; 666 } 667 668 return false; 669 } 670 671 // If multiple conditional branches are executed based on the (essentially) 672 // same comparison, we merge compare instructions into one and make multiple 673 // conditional branches on this comparison. 674 // For example, 675 // if (a == 0) { ... } 676 // else if (a < 0) { ... } 677 // can be executed by one compare and two conditional branches instead of 678 // two pairs of a compare and a conditional branch. 679 // 680 // This method merges two compare instructions in two MBBs and modifies the 681 // compare and conditional branch instructions if needed. 682 // For the above example, the input for this pass looks like: 683 // cmplwi r3, 0 684 // beq 0, .LBB0_3 685 // cmpwi r3, -1 686 // bgt 0, .LBB0_4 687 // So, before merging two compares, we need to modify these instructions as 688 // cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq 689 // beq 0, .LBB0_3 690 // cmpwi r3, 0 ; greather than -1 means greater or equal to 0 691 // bge 0, .LBB0_4 692 693 bool PPCMIPeephole::eliminateRedundantCompare(void) { 694 bool Simplified = false; 695 696 for (MachineBasicBlock &MBB2 : *MF) { 697 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr; 698 699 // For fully redundant case, we select two basic blocks MBB1 and MBB2 700 // as an optimization target if 701 // - both MBBs end with a conditional branch, 702 // - MBB1 is the only predecessor of MBB2, and 703 // - compare does not take a physical register as a operand in both MBBs. 704 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr. 705 // 706 // As partially redundant case, we additionally handle if MBB2 has one 707 // additional predecessor, which has only one successor (MBB2). 708 // In this case, we move the compre instruction originally in MBB2 into 709 // MBBtoMoveCmp. This partially redundant case is typically appear by 710 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader. 711 // 712 // Overview of CFG of related basic blocks 713 // Fully redundant case Partially redundant case 714 // -------- ---------------- -------- 715 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ) 716 // -------- ---------------- -------- 717 // | \ (w/ 1 succ) \ | \ 718 // | \ \ | \ 719 // | \ | 720 // -------- -------- 721 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred 722 // -------- and 2 succ) -------- and 2 succ) 723 // | \ | \ 724 // | \ | \ 725 // 726 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI)) 727 continue; 728 729 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator(); 730 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg()); 731 732 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator(); 733 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg()); 734 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr); 735 736 // We cannot optimize an unsupported compare opcode or 737 // a mix of 32-bit and 64-bit comaprisons 738 if (!isSupportedCmpOp(CMPI1->getOpcode()) || 739 !isSupportedCmpOp(CMPI2->getOpcode()) || 740 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode())) 741 continue; 742 743 unsigned NewOpCode = 0; 744 unsigned NewPredicate1 = 0, NewPredicate2 = 0; 745 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0; 746 bool SwapOperands = false; 747 748 if (CMPI1->getOpcode() != CMPI2->getOpcode()) { 749 // Typically, unsigned comparison is used for equality check, but 750 // we replace it with a signed comparison if the comparison 751 // to be merged is a signed comparison. 752 // In other cases of opcode mismatch, we cannot optimize this. 753 if (isEqOrNe(BI2) && 754 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode())) 755 NewOpCode = CMPI1->getOpcode(); 756 else if (isEqOrNe(BI1) && 757 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode()) 758 NewOpCode = CMPI2->getOpcode(); 759 else continue; 760 } 761 762 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) { 763 // In case of comparisons between two registers, these two registers 764 // must be same to merge two comparisons. 765 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(), 766 nullptr, nullptr, MRI); 767 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(), 768 nullptr, nullptr, MRI); 769 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(), 770 MBB1, &MBB2, MRI); 771 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(), 772 MBB1, &MBB2, MRI); 773 774 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) { 775 // Same pair of registers in the same order; ready to merge as is. 776 } 777 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) { 778 // Same pair of registers in different order. 779 // We reverse the predicate to merge compare instructions. 780 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm(); 781 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred); 782 // In case of partial redundancy, we need to swap operands 783 // in another compare instruction. 784 SwapOperands = true; 785 } 786 else continue; 787 } 788 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) { 789 // In case of comparisons between a register and an immediate, 790 // the operand register must be same for two compare instructions. 791 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(), 792 nullptr, nullptr, MRI); 793 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(), 794 MBB1, &MBB2, MRI); 795 if (Cmp1Operand1 != Cmp2Operand1) 796 continue; 797 798 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm(); 799 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm(); 800 801 // If immediate are not same, we try to adjust by changing predicate; 802 // e.g. GT imm means GE (imm+1). 803 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) { 804 int Diff = Imm1 - Imm2; 805 if (Diff < -2 || Diff > 2) 806 continue; 807 808 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1); 809 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1); 810 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2); 811 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2); 812 if (Diff == 2) { 813 if (PredToInc2 && PredToDec1) { 814 NewPredicate2 = PredToInc2; 815 NewPredicate1 = PredToDec1; 816 NewImm2++; 817 NewImm1--; 818 } 819 } 820 else if (Diff == 1) { 821 if (PredToInc2) { 822 NewImm2++; 823 NewPredicate2 = PredToInc2; 824 } 825 else if (PredToDec1) { 826 NewImm1--; 827 NewPredicate1 = PredToDec1; 828 } 829 } 830 else if (Diff == -1) { 831 if (PredToDec2) { 832 NewImm2--; 833 NewPredicate2 = PredToDec2; 834 } 835 else if (PredToInc1) { 836 NewImm1++; 837 NewPredicate1 = PredToInc1; 838 } 839 } 840 else if (Diff == -2) { 841 if (PredToDec2 && PredToInc1) { 842 NewPredicate2 = PredToDec2; 843 NewPredicate1 = PredToInc1; 844 NewImm2--; 845 NewImm1++; 846 } 847 } 848 } 849 850 // We cannnot merge two compares if the immediates are not same. 851 if (NewImm2 != NewImm1) 852 continue; 853 } 854 855 DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n"); 856 DEBUG(CMPI1->dump()); 857 DEBUG(BI1->dump()); 858 DEBUG(CMPI2->dump()); 859 DEBUG(BI2->dump()); 860 861 // We adjust opcode, predicates and immediate as we determined above. 862 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) { 863 CMPI1->setDesc(TII->get(NewOpCode)); 864 } 865 if (NewPredicate1) { 866 BI1->getOperand(0).setImm(NewPredicate1); 867 } 868 if (NewPredicate2) { 869 BI2->getOperand(0).setImm(NewPredicate2); 870 } 871 if (NewImm1 != Imm1) { 872 CMPI1->getOperand(2).setImm(NewImm1); 873 } 874 875 if (IsPartiallyRedundant) { 876 // We touch up the compare instruction in MBB2 and move it to 877 // a previous BB to handle partially redundant case. 878 if (SwapOperands) { 879 unsigned Op1 = CMPI2->getOperand(1).getReg(); 880 unsigned Op2 = CMPI2->getOperand(2).getReg(); 881 CMPI2->getOperand(1).setReg(Op2); 882 CMPI2->getOperand(2).setReg(Op1); 883 } 884 if (NewImm2 != Imm2) 885 CMPI2->getOperand(2).setImm(NewImm2); 886 887 for (int I = 1; I <= 2; I++) { 888 if (CMPI2->getOperand(I).isReg()) { 889 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg()); 890 if (Inst->getParent() != &MBB2) 891 continue; 892 893 assert(Inst->getOpcode() == PPC::PHI && 894 "We cannot support if an operand comes from this BB."); 895 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp); 896 CMPI2->getOperand(I).setReg(SrcReg); 897 } 898 } 899 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator()); 900 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2)); 901 902 DebugLoc DL = CMPI2->getDebugLoc(); 903 unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass); 904 BuildMI(MBB2, MBB2.begin(), DL, 905 TII->get(PPC::PHI), NewVReg) 906 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1) 907 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp); 908 BI2->getOperand(1).setReg(NewVReg); 909 } 910 else { 911 // We finally eliminate compare instruction in MBB2. 912 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg()); 913 CMPI2->eraseFromParent(); 914 } 915 BI2->getOperand(1).setIsKill(true); 916 BI1->getOperand(1).setIsKill(false); 917 918 DEBUG(dbgs() << "into a compare and two branches:\n"); 919 DEBUG(CMPI1->dump()); 920 DEBUG(BI1->dump()); 921 DEBUG(BI2->dump()); 922 if (IsPartiallyRedundant) { 923 DEBUG(dbgs() << "The following compare is moved into BB#" << 924 MBBtoMoveCmp->getNumber() << " to handle partial redundancy.\n"); 925 DEBUG(CMPI2->dump()); 926 } 927 928 Simplified = true; 929 } 930 931 return Simplified; 932 } 933 934 // This is used to find the "true" source register for an 935 // XXPERMDI instruction, since MachineCSE does not handle the 936 // "copy-like" operations (Copy and SubregToReg). Returns 937 // the original SrcReg unless it is the target of a copy-like 938 // operation, in which case we chain backwards through all 939 // such operations to the ultimate source register. If a 940 // physical register is encountered, we stop the search. 941 unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) { 942 943 while (true) { 944 945 MachineInstr *MI = MRI->getVRegDef(SrcReg); 946 if (!MI->isCopyLike()) 947 return SrcReg; 948 949 unsigned CopySrcReg; 950 if (MI->isCopy()) 951 CopySrcReg = MI->getOperand(1).getReg(); 952 else { 953 assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike"); 954 CopySrcReg = MI->getOperand(2).getReg(); 955 } 956 957 if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) 958 return CopySrcReg; 959 960 SrcReg = CopySrcReg; 961 } 962 } 963 964 } // end default namespace 965 966 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE, 967 "PowerPC MI Peephole Optimization", false, false) 968 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE, 969 "PowerPC MI Peephole Optimization", false, false) 970 971 char PPCMIPeephole::ID = 0; 972 FunctionPass* 973 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); } 974 975