1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===---------------------------------------------------------------------===// 9 // 10 // This pass performs peephole optimizations to clean up ugly code 11 // sequences at the MachineInstruction layer. It runs at the end of 12 // the SSA phases, following VSX swap removal. A pass of dead code 13 // elimination follows this one for quick clean-up of any dead 14 // instructions introduced here. Although we could do this as callbacks 15 // from the generic peephole pass, this would have a couple of bad 16 // effects: it might remove optimization opportunities for VSX swap 17 // removal, and it would miss cleanups made possible following VSX 18 // swap removal. 19 // 20 //===---------------------------------------------------------------------===// 21 22 #include "PPC.h" 23 #include "PPCInstrBuilder.h" 24 #include "PPCInstrInfo.h" 25 #include "PPCTargetMachine.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/Support/Debug.h" 30 #include "MCTargetDesc/PPCPredicates.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "ppc-mi-peepholes" 35 36 namespace llvm { 37 void initializePPCMIPeepholePass(PassRegistry&); 38 } 39 40 namespace { 41 42 struct PPCMIPeephole : public MachineFunctionPass { 43 44 static char ID; 45 const PPCInstrInfo *TII; 46 MachineFunction *MF; 47 MachineRegisterInfo *MRI; 48 49 PPCMIPeephole() : MachineFunctionPass(ID) { 50 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry()); 51 } 52 53 private: 54 // Initialize class variables. 55 void initialize(MachineFunction &MFParm); 56 57 // Perform peepholes. 58 bool simplifyCode(void); 59 60 // Perform peepholes. 61 bool eliminateRedundantCompare(void); 62 63 // Find the "true" register represented by SrcReg (following chains 64 // of copies and subreg_to_reg operations). 65 unsigned lookThruCopyLike(unsigned SrcReg); 66 67 public: 68 // Main entry point for this pass. 69 bool runOnMachineFunction(MachineFunction &MF) override { 70 if (skipFunction(*MF.getFunction())) 71 return false; 72 initialize(MF); 73 return simplifyCode(); 74 } 75 }; 76 77 // Initialize class variables. 78 void PPCMIPeephole::initialize(MachineFunction &MFParm) { 79 MF = &MFParm; 80 MRI = &MF->getRegInfo(); 81 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 82 DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n"); 83 DEBUG(MF->dump()); 84 } 85 86 // Perform peephole optimizations. 87 bool PPCMIPeephole::simplifyCode(void) { 88 bool Simplified = false; 89 MachineInstr* ToErase = nullptr; 90 91 for (MachineBasicBlock &MBB : *MF) { 92 for (MachineInstr &MI : MBB) { 93 94 // If the previous instruction was marked for elimination, 95 // remove it now. 96 if (ToErase) { 97 ToErase->eraseFromParent(); 98 ToErase = nullptr; 99 } 100 101 // Ignore debug instructions. 102 if (MI.isDebugValue()) 103 continue; 104 105 // Per-opcode peepholes. 106 switch (MI.getOpcode()) { 107 108 default: 109 break; 110 111 case PPC::XXPERMDI: { 112 // Perform simplifications of 2x64 vector swaps and splats. 113 // A swap is identified by an immediate value of 2, and a splat 114 // is identified by an immediate value of 0 or 3. 115 int Immed = MI.getOperand(3).getImm(); 116 117 if (Immed != 1) { 118 119 // For each of these simplifications, we need the two source 120 // regs to match. Unfortunately, MachineCSE ignores COPY and 121 // SUBREG_TO_REG, so for example we can see 122 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed. 123 // We have to look through chains of COPY and SUBREG_TO_REG 124 // to find the real source values for comparison. 125 unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg()); 126 unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg()); 127 128 if (TrueReg1 == TrueReg2 129 && TargetRegisterInfo::isVirtualRegister(TrueReg1)) { 130 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); 131 unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0; 132 133 // If this is a splat fed by a splatting load, the splat is 134 // redundant. Replace with a copy. This doesn't happen directly due 135 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting 136 // a load of a double to a vector of 64-bit integers. 137 auto isConversionOfLoadAndSplat = [=]() -> bool { 138 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS) 139 return false; 140 unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg()); 141 if (TargetRegisterInfo::isVirtualRegister(DefReg)) { 142 MachineInstr *LoadMI = MRI->getVRegDef(DefReg); 143 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) 144 return true; 145 } 146 return false; 147 }; 148 if (DefMI && (Immed == 0 || Immed == 3)) { 149 if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) { 150 DEBUG(dbgs() 151 << "Optimizing load-and-splat/splat " 152 "to load-and-splat/copy: "); 153 DEBUG(MI.dump()); 154 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 155 MI.getOperand(0).getReg()) 156 .add(MI.getOperand(1)); 157 ToErase = &MI; 158 Simplified = true; 159 } 160 } 161 162 // If this is a splat or a swap fed by another splat, we 163 // can replace it with a copy. 164 if (DefOpc == PPC::XXPERMDI) { 165 unsigned FeedImmed = DefMI->getOperand(3).getImm(); 166 unsigned FeedReg1 167 = lookThruCopyLike(DefMI->getOperand(1).getReg()); 168 unsigned FeedReg2 169 = lookThruCopyLike(DefMI->getOperand(2).getReg()); 170 171 if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) { 172 DEBUG(dbgs() 173 << "Optimizing splat/swap or splat/splat " 174 "to splat/copy: "); 175 DEBUG(MI.dump()); 176 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 177 MI.getOperand(0).getReg()) 178 .add(MI.getOperand(1)); 179 ToErase = &MI; 180 Simplified = true; 181 } 182 183 // If this is a splat fed by a swap, we can simplify modify 184 // the splat to splat the other value from the swap's input 185 // parameter. 186 else if ((Immed == 0 || Immed == 3) 187 && FeedImmed == 2 && FeedReg1 == FeedReg2) { 188 DEBUG(dbgs() << "Optimizing swap/splat => splat: "); 189 DEBUG(MI.dump()); 190 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg()); 191 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg()); 192 MI.getOperand(3).setImm(3 - Immed); 193 Simplified = true; 194 } 195 196 // If this is a swap fed by a swap, we can replace it 197 // with a copy from the first swap's input. 198 else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) { 199 DEBUG(dbgs() << "Optimizing swap/swap => copy: "); 200 DEBUG(MI.dump()); 201 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 202 MI.getOperand(0).getReg()) 203 .add(DefMI->getOperand(1)); 204 ToErase = &MI; 205 Simplified = true; 206 } 207 } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs && 208 (DefMI->getOperand(2).getImm() == 0 || 209 DefMI->getOperand(2).getImm() == 3)) { 210 // Splat fed by another splat - switch the output of the first 211 // and remove the second. 212 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); 213 ToErase = &MI; 214 Simplified = true; 215 DEBUG(dbgs() << "Removing redundant splat: "); 216 DEBUG(MI.dump()); 217 } 218 } 219 } 220 break; 221 } 222 case PPC::VSPLTB: 223 case PPC::VSPLTH: 224 case PPC::XXSPLTW: { 225 unsigned MyOpcode = MI.getOpcode(); 226 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; 227 unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg()); 228 if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) 229 break; 230 MachineInstr *DefMI = MRI->getVRegDef(TrueReg); 231 if (!DefMI) 232 break; 233 unsigned DefOpcode = DefMI->getOpcode(); 234 auto isConvertOfSplat = [=]() -> bool { 235 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS) 236 return false; 237 unsigned ConvReg = DefMI->getOperand(1).getReg(); 238 if (!TargetRegisterInfo::isVirtualRegister(ConvReg)) 239 return false; 240 MachineInstr *Splt = MRI->getVRegDef(ConvReg); 241 return Splt && (Splt->getOpcode() == PPC::LXVWSX || 242 Splt->getOpcode() == PPC::XXSPLTW); 243 }; 244 bool AlreadySplat = (MyOpcode == DefOpcode) || 245 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) || 246 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) || 247 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) || 248 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) || 249 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)|| 250 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat()); 251 // If the instruction[s] that feed this splat have already splat 252 // the value, this splat is redundant. 253 if (AlreadySplat) { 254 DEBUG(dbgs() << "Changing redundant splat to a copy: "); 255 DEBUG(MI.dump()); 256 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 257 MI.getOperand(0).getReg()) 258 .add(MI.getOperand(OpNo)); 259 ToErase = &MI; 260 Simplified = true; 261 } 262 // Splat fed by a shift. Usually when we align value to splat into 263 // vector element zero. 264 if (DefOpcode == PPC::XXSLDWI) { 265 unsigned ShiftRes = DefMI->getOperand(0).getReg(); 266 unsigned ShiftOp1 = DefMI->getOperand(1).getReg(); 267 unsigned ShiftOp2 = DefMI->getOperand(2).getReg(); 268 unsigned ShiftImm = DefMI->getOperand(3).getImm(); 269 unsigned SplatImm = MI.getOperand(2).getImm(); 270 if (ShiftOp1 == ShiftOp2) { 271 unsigned NewElem = (SplatImm + ShiftImm) & 0x3; 272 if (MRI->hasOneNonDBGUse(ShiftRes)) { 273 DEBUG(dbgs() << "Removing redundant shift: "); 274 DEBUG(DefMI->dump()); 275 ToErase = DefMI; 276 } 277 Simplified = true; 278 DEBUG(dbgs() << "Changing splat immediate from " << SplatImm << 279 " to " << NewElem << " in instruction: "); 280 DEBUG(MI.dump()); 281 MI.getOperand(1).setReg(ShiftOp1); 282 MI.getOperand(2).setImm(NewElem); 283 } 284 } 285 break; 286 } 287 case PPC::XVCVDPSP: { 288 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant. 289 unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg()); 290 if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) 291 break; 292 MachineInstr *DefMI = MRI->getVRegDef(TrueReg); 293 294 // This can occur when building a vector of single precision or integer 295 // values. 296 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) { 297 unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg()); 298 unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg()); 299 if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) || 300 !TargetRegisterInfo::isVirtualRegister(DefsReg2)) 301 break; 302 MachineInstr *P1 = MRI->getVRegDef(DefsReg1); 303 MachineInstr *P2 = MRI->getVRegDef(DefsReg2); 304 305 if (!P1 || !P2) 306 break; 307 308 // Remove the passed FRSP instruction if it only feeds this MI and 309 // set any uses of that FRSP (in this MI) to the source of the FRSP. 310 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) { 311 if (RoundInstr->getOpcode() == PPC::FRSP && 312 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) { 313 Simplified = true; 314 unsigned ConvReg1 = RoundInstr->getOperand(1).getReg(); 315 unsigned FRSPDefines = RoundInstr->getOperand(0).getReg(); 316 MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines)); 317 for (int i = 0, e = Use.getNumOperands(); i < e; ++i) 318 if (Use.getOperand(i).isReg() && 319 Use.getOperand(i).getReg() == FRSPDefines) 320 Use.getOperand(i).setReg(ConvReg1); 321 DEBUG(dbgs() << "Removing redundant FRSP:\n"); 322 DEBUG(RoundInstr->dump()); 323 DEBUG(dbgs() << "As it feeds instruction:\n"); 324 DEBUG(MI.dump()); 325 DEBUG(dbgs() << "Through instruction:\n"); 326 DEBUG(DefMI->dump()); 327 RoundInstr->eraseFromParent(); 328 } 329 }; 330 331 // If the input to XVCVDPSP is a vector that was built (even 332 // partially) out of FRSP's, the FRSP(s) can safely be removed 333 // since this instruction performs the same operation. 334 if (P1 != P2) { 335 removeFRSPIfPossible(P1); 336 removeFRSPIfPossible(P2); 337 break; 338 } 339 removeFRSPIfPossible(P1); 340 } 341 break; 342 } 343 } 344 } 345 // If the last instruction was marked for elimination, 346 // remove it now. 347 if (ToErase) { 348 ToErase->eraseFromParent(); 349 ToErase = nullptr; 350 } 351 } 352 353 // We try to eliminate redundant compare instruction. 354 Simplified |= eliminateRedundantCompare(); 355 356 return Simplified; 357 } 358 359 // helper functions for eliminateRedundantCompare 360 static bool isEqOrNe(MachineInstr *BI) { 361 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm(); 362 unsigned PredCond = PPC::getPredicateCondition(Pred); 363 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE); 364 } 365 366 static bool isSupportedCmpOp(unsigned opCode) { 367 return (opCode == PPC::CMPLD || opCode == PPC::CMPD || 368 opCode == PPC::CMPLW || opCode == PPC::CMPW || 369 opCode == PPC::CMPLDI || opCode == PPC::CMPDI || 370 opCode == PPC::CMPLWI || opCode == PPC::CMPWI); 371 } 372 373 static bool is64bitCmpOp(unsigned opCode) { 374 return (opCode == PPC::CMPLD || opCode == PPC::CMPD || 375 opCode == PPC::CMPLDI || opCode == PPC::CMPDI); 376 } 377 378 static bool isSignedCmpOp(unsigned opCode) { 379 return (opCode == PPC::CMPD || opCode == PPC::CMPW || 380 opCode == PPC::CMPDI || opCode == PPC::CMPWI); 381 } 382 383 static unsigned getSignedCmpOpCode(unsigned opCode) { 384 if (opCode == PPC::CMPLD) return PPC::CMPD; 385 if (opCode == PPC::CMPLW) return PPC::CMPW; 386 if (opCode == PPC::CMPLDI) return PPC::CMPDI; 387 if (opCode == PPC::CMPLWI) return PPC::CMPWI; 388 return opCode; 389 } 390 391 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or 392 // (LT x) to (LE x-1) 393 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) { 394 uint64_t Imm = CMPI->getOperand(2).getImm(); 395 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode()); 396 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000)) 397 return 0; 398 399 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm(); 400 unsigned PredCond = PPC::getPredicateCondition(Pred); 401 unsigned PredHint = PPC::getPredicateHint(Pred); 402 if (PredCond == PPC::PRED_GE) 403 return PPC::getPredicate(PPC::PRED_GT, PredHint); 404 if (PredCond == PPC::PRED_LT) 405 return PPC::getPredicate(PPC::PRED_LE, PredHint); 406 407 return 0; 408 } 409 410 // We can increment immediate x in (GT x) by changing it to (GE x+1) or 411 // (LE x) to (LT x+1) 412 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) { 413 uint64_t Imm = CMPI->getOperand(2).getImm(); 414 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode()); 415 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF)) 416 return 0; 417 418 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm(); 419 unsigned PredCond = PPC::getPredicateCondition(Pred); 420 unsigned PredHint = PPC::getPredicateHint(Pred); 421 if (PredCond == PPC::PRED_GT) 422 return PPC::getPredicate(PPC::PRED_GE, PredHint); 423 if (PredCond == PPC::PRED_LE) 424 return PPC::getPredicate(PPC::PRED_LT, PredHint); 425 426 return 0; 427 } 428 429 static bool eligibleForCompareElimination(MachineBasicBlock &MBB, 430 MachineRegisterInfo *MRI) { 431 432 auto isEligibleBB = [&](MachineBasicBlock &BB) { 433 auto BII = BB.getFirstInstrTerminator(); 434 // We optimize BBs ending with a conditional branch. 435 // We check only for BCC here, not BCCLR, because BCCLR 436 // will be formed only later in the pipeline. 437 if (BB.succ_size() == 2 && 438 BII != BB.instr_end() && 439 (*BII).getOpcode() == PPC::BCC && 440 (*BII).getOperand(1).isReg()) { 441 // We optimize only if the condition code is used only by one BCC. 442 unsigned CndReg = (*BII).getOperand(1).getReg(); 443 if (!TargetRegisterInfo::isVirtualRegister(CndReg) || 444 !MRI->hasOneNonDBGUse(CndReg)) 445 return false; 446 447 // We skip this BB if a physical register is used in comparison. 448 MachineInstr *CMPI = MRI->getVRegDef(CndReg); 449 for (MachineOperand &MO : CMPI->operands()) 450 if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) 451 return false; 452 453 return true; 454 } 455 return false; 456 }; 457 458 if (MBB.pred_size() != 1) 459 return false; 460 461 MachineBasicBlock *PredMBB = *MBB.pred_begin(); 462 if (isEligibleBB(MBB) && isEligibleBB(*PredMBB)) 463 return true; 464 465 return false; 466 } 467 468 // If multiple conditional branches are executed based on the (essentially) 469 // same comparison, we merge compare instructions into one and make multiple 470 // conditional branches on this comparison. 471 // For example, 472 // if (a == 0) { ... } 473 // else if (a < 0) { ... } 474 // can be executed by one compare and two conditional branches instead of 475 // two pairs of a compare and a conditional branch. 476 // 477 // This method merges two compare instructions in two MBBs and modifies the 478 // compare and conditional branch instructions if needed. 479 // For the above example, the input for this pass looks like: 480 // cmplwi r3, 0 481 // beq 0, .LBB0_3 482 // cmpwi r3, -1 483 // bgt 0, .LBB0_4 484 // So, before merging two compares, we need to modify these instructions as 485 // cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq 486 // beq 0, .LBB0_3 487 // cmpwi r3, 0 ; greather than -1 means greater or equal to 0 488 // bge 0, .LBB0_4 489 490 bool PPCMIPeephole::eliminateRedundantCompare(void) { 491 bool Simplified = false; 492 493 for (MachineBasicBlock &MBB2 : *MF) { 494 // We only consider two basic blocks MBB1 and MBB2 if 495 // - both MBBs end with a conditional branch, 496 // - MBB1 is the only predecessor of MBB2, and 497 // - compare does not take a physical register as a operand in both MBBs. 498 if (!eligibleForCompareElimination(MBB2, MRI)) 499 continue; 500 501 MachineBasicBlock *MBB1 = *MBB2.pred_begin(); 502 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator(); 503 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg()); 504 505 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator(); 506 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg()); 507 508 // We cannot optimize an unsupported compare opcode or 509 // a mix of 32-bit and 64-bit comaprisons 510 if (!isSupportedCmpOp(CMPI1->getOpcode()) || 511 !isSupportedCmpOp(CMPI2->getOpcode()) || 512 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode())) 513 continue; 514 515 unsigned NewOpCode = 0; 516 unsigned NewPredicate1 = 0, NewPredicate2 = 0; 517 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0; 518 519 if (CMPI1->getOpcode() != CMPI2->getOpcode()) { 520 // Typically, unsigned comparison is used for equality check, but 521 // we replace it with a signed comparison if the comparison 522 // to be merged is a signed comparison. 523 // In other cases of opcode mismatch, we cannot optimize this. 524 if (isEqOrNe(BI2) && 525 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode())) 526 NewOpCode = CMPI1->getOpcode(); 527 else if (isEqOrNe(BI1) && 528 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode()) 529 NewOpCode = CMPI2->getOpcode(); 530 else continue; 531 } 532 533 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) { 534 // In case of comparisons between two registers, these two registers 535 // must be same to merge two comparisons. 536 unsigned Cmp1Operand1 = CMPI1->getOperand(1).getReg(); 537 unsigned Cmp1Operand2 = CMPI1->getOperand(2).getReg(); 538 unsigned Cmp2Operand1 = CMPI2->getOperand(1).getReg(); 539 unsigned Cmp2Operand2 = CMPI2->getOperand(2).getReg(); 540 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) { 541 // Same pair of registers in the same order; ready to merge as is. 542 } 543 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) { 544 // Same pair of registers in different order. 545 // We reverse the predicate to merge compare instructions. 546 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm(); 547 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred); 548 } 549 else continue; 550 } 551 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()){ 552 // In case of comparisons between a register and an immediate, 553 // the operand register must be same for two compare instructions. 554 if (CMPI1->getOperand(1).getReg() != CMPI2->getOperand(1).getReg()) 555 continue; 556 557 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm(); 558 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm(); 559 560 // If immediate are not same, we try to adjust by changing predicate; 561 // e.g. GT imm means GE (imm+1). 562 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) { 563 int Diff = Imm1 - Imm2; 564 if (Diff < -2 || Diff > 2) 565 continue; 566 567 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1); 568 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1); 569 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2); 570 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2); 571 if (Diff == 2) { 572 if (PredToInc2 && PredToDec1) { 573 NewPredicate2 = PredToInc2; 574 NewPredicate1 = PredToDec1; 575 NewImm2++; 576 NewImm1--; 577 } 578 } 579 else if (Diff == 1) { 580 if (PredToInc2) { 581 NewImm2++; 582 NewPredicate2 = PredToInc2; 583 } 584 else if (PredToDec1) { 585 NewImm1--; 586 NewPredicate1 = PredToDec1; 587 } 588 } 589 else if (Diff == -1) { 590 if (PredToDec2) { 591 NewImm2--; 592 NewPredicate2 = PredToDec2; 593 } 594 else if (PredToInc1) { 595 NewImm1++; 596 NewPredicate1 = PredToInc1; 597 } 598 } 599 else if (Diff == -2) { 600 if (PredToDec2 && PredToInc1) { 601 NewPredicate2 = PredToDec2; 602 NewPredicate1 = PredToInc1; 603 NewImm2--; 604 NewImm1++; 605 } 606 } 607 } 608 609 // We cannnot merge two compares if the immediates are not same. 610 if (NewImm2 != NewImm1) 611 continue; 612 } 613 614 DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n"); 615 DEBUG(CMPI1->dump()); 616 DEBUG(BI1->dump()); 617 DEBUG(CMPI2->dump()); 618 DEBUG(BI2->dump()); 619 620 // We adjust opcode, predicates and immediate as we determined above. 621 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) { 622 CMPI1->setDesc(TII->get(NewOpCode)); 623 } 624 if (NewPredicate1) { 625 BI1->getOperand(0).setImm(NewPredicate1); 626 } 627 if (NewPredicate2) { 628 BI2->getOperand(0).setImm(NewPredicate2); 629 } 630 if (NewImm1 != Imm1) { 631 CMPI1->getOperand(2).setImm(NewImm1); 632 } 633 634 // We finally eliminate compare instruction in MBB2. 635 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg()); 636 BI2->getOperand(1).setIsKill(true); 637 BI1->getOperand(1).setIsKill(false); 638 CMPI2->eraseFromParent(); 639 640 DEBUG(dbgs() << "into a compare and two branches:\n"); 641 DEBUG(CMPI1->dump()); 642 DEBUG(BI1->dump()); 643 DEBUG(BI2->dump()); 644 645 Simplified = true; 646 } 647 648 return Simplified; 649 } 650 651 // This is used to find the "true" source register for an 652 // XXPERMDI instruction, since MachineCSE does not handle the 653 // "copy-like" operations (Copy and SubregToReg). Returns 654 // the original SrcReg unless it is the target of a copy-like 655 // operation, in which case we chain backwards through all 656 // such operations to the ultimate source register. If a 657 // physical register is encountered, we stop the search. 658 unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) { 659 660 while (true) { 661 662 MachineInstr *MI = MRI->getVRegDef(SrcReg); 663 if (!MI->isCopyLike()) 664 return SrcReg; 665 666 unsigned CopySrcReg; 667 if (MI->isCopy()) 668 CopySrcReg = MI->getOperand(1).getReg(); 669 else { 670 assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike"); 671 CopySrcReg = MI->getOperand(2).getReg(); 672 } 673 674 if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) 675 return CopySrcReg; 676 677 SrcReg = CopySrcReg; 678 } 679 } 680 681 } // end default namespace 682 683 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE, 684 "PowerPC MI Peephole Optimization", false, false) 685 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE, 686 "PowerPC MI Peephole Optimization", false, false) 687 688 char PPCMIPeephole::ID = 0; 689 FunctionPass* 690 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); } 691 692