1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===---------------------------------------------------------------------===// 9 // 10 // This pass performs peephole optimizations to clean up ugly code 11 // sequences at the MachineInstruction layer. It runs at the end of 12 // the SSA phases, following VSX swap removal. A pass of dead code 13 // elimination follows this one for quick clean-up of any dead 14 // instructions introduced here. Although we could do this as callbacks 15 // from the generic peephole pass, this would have a couple of bad 16 // effects: it might remove optimization opportunities for VSX swap 17 // removal, and it would miss cleanups made possible following VSX 18 // swap removal. 19 // 20 //===---------------------------------------------------------------------===// 21 22 #include "PPCInstrInfo.h" 23 #include "PPC.h" 24 #include "PPCInstrBuilder.h" 25 #include "PPCTargetMachine.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/Support/Debug.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "ppc-mi-peepholes" 34 35 namespace llvm { 36 void initializePPCMIPeepholePass(PassRegistry&); 37 } 38 39 namespace { 40 41 struct PPCMIPeephole : public MachineFunctionPass { 42 43 static char ID; 44 const PPCInstrInfo *TII; 45 MachineFunction *MF; 46 MachineRegisterInfo *MRI; 47 48 PPCMIPeephole() : MachineFunctionPass(ID) { 49 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry()); 50 } 51 52 private: 53 // Initialize class variables. 54 void initialize(MachineFunction &MFParm); 55 56 // Perform peepholes. 57 bool simplifyCode(void); 58 59 // Find the "true" register represented by SrcReg (following chains 60 // of copies and subreg_to_reg operations). 61 unsigned lookThruCopyLike(unsigned SrcReg); 62 63 public: 64 // Main entry point for this pass. 65 bool runOnMachineFunction(MachineFunction &MF) override { 66 if (skipFunction(*MF.getFunction())) 67 return false; 68 initialize(MF); 69 return simplifyCode(); 70 } 71 }; 72 73 // Initialize class variables. 74 void PPCMIPeephole::initialize(MachineFunction &MFParm) { 75 MF = &MFParm; 76 MRI = &MF->getRegInfo(); 77 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 78 DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n"); 79 DEBUG(MF->dump()); 80 } 81 82 // Perform peephole optimizations. 83 bool PPCMIPeephole::simplifyCode(void) { 84 bool Simplified = false; 85 MachineInstr* ToErase = nullptr; 86 87 for (MachineBasicBlock &MBB : *MF) { 88 for (MachineInstr &MI : MBB) { 89 90 // If the previous instruction was marked for elimination, 91 // remove it now. 92 if (ToErase) { 93 ToErase->eraseFromParent(); 94 ToErase = nullptr; 95 } 96 97 // Ignore debug instructions. 98 if (MI.isDebugValue()) 99 continue; 100 101 // Per-opcode peepholes. 102 switch (MI.getOpcode()) { 103 104 default: 105 break; 106 107 case PPC::XXPERMDI: { 108 // Perform simplifications of 2x64 vector swaps and splats. 109 // A swap is identified by an immediate value of 2, and a splat 110 // is identified by an immediate value of 0 or 3. 111 int Immed = MI.getOperand(3).getImm(); 112 113 if (Immed != 1) { 114 115 // For each of these simplifications, we need the two source 116 // regs to match. Unfortunately, MachineCSE ignores COPY and 117 // SUBREG_TO_REG, so for example we can see 118 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed. 119 // We have to look through chains of COPY and SUBREG_TO_REG 120 // to find the real source values for comparison. 121 unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg()); 122 unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg()); 123 124 if (TrueReg1 == TrueReg2 125 && TargetRegisterInfo::isVirtualRegister(TrueReg1)) { 126 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); 127 unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0; 128 129 // If this is a splat fed by a splatting load, the splat is 130 // redundant. Replace with a copy. This doesn't happen directly due 131 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting 132 // a load of a double to a vector of 64-bit integers. 133 auto isConversionOfLoadAndSplat = [=]() -> bool { 134 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS) 135 return false; 136 unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg()); 137 if (TargetRegisterInfo::isVirtualRegister(DefReg)) { 138 MachineInstr *LoadMI = MRI->getVRegDef(DefReg); 139 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) 140 return true; 141 } 142 return false; 143 }; 144 if (DefMI && (Immed == 0 || Immed == 3)) { 145 if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) { 146 DEBUG(dbgs() 147 << "Optimizing load-and-splat/splat " 148 "to load-and-splat/copy: "); 149 DEBUG(MI.dump()); 150 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 151 MI.getOperand(0).getReg()) 152 .add(MI.getOperand(1)); 153 ToErase = &MI; 154 Simplified = true; 155 } 156 } 157 158 // If this is a splat or a swap fed by another splat, we 159 // can replace it with a copy. 160 if (DefOpc == PPC::XXPERMDI) { 161 unsigned FeedImmed = DefMI->getOperand(3).getImm(); 162 unsigned FeedReg1 163 = lookThruCopyLike(DefMI->getOperand(1).getReg()); 164 unsigned FeedReg2 165 = lookThruCopyLike(DefMI->getOperand(2).getReg()); 166 167 if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) { 168 DEBUG(dbgs() 169 << "Optimizing splat/swap or splat/splat " 170 "to splat/copy: "); 171 DEBUG(MI.dump()); 172 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 173 MI.getOperand(0).getReg()) 174 .add(MI.getOperand(1)); 175 ToErase = &MI; 176 Simplified = true; 177 } 178 179 // If this is a splat fed by a swap, we can simplify modify 180 // the splat to splat the other value from the swap's input 181 // parameter. 182 else if ((Immed == 0 || Immed == 3) 183 && FeedImmed == 2 && FeedReg1 == FeedReg2) { 184 DEBUG(dbgs() << "Optimizing swap/splat => splat: "); 185 DEBUG(MI.dump()); 186 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg()); 187 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg()); 188 MI.getOperand(3).setImm(3 - Immed); 189 Simplified = true; 190 } 191 192 // If this is a swap fed by a swap, we can replace it 193 // with a copy from the first swap's input. 194 else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) { 195 DEBUG(dbgs() << "Optimizing swap/swap => copy: "); 196 DEBUG(MI.dump()); 197 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 198 MI.getOperand(0).getReg()) 199 .add(DefMI->getOperand(1)); 200 ToErase = &MI; 201 Simplified = true; 202 } 203 } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs && 204 (DefMI->getOperand(2).getImm() == 0 || 205 DefMI->getOperand(2).getImm() == 3)) { 206 // Splat fed by another splat - switch the output of the first 207 // and remove the second. 208 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); 209 ToErase = &MI; 210 Simplified = true; 211 DEBUG(dbgs() << "Removing redundant splat: "); 212 DEBUG(MI.dump()); 213 } 214 } 215 } 216 break; 217 } 218 case PPC::VSPLTB: 219 case PPC::VSPLTH: 220 case PPC::XXSPLTW: { 221 unsigned MyOpcode = MI.getOpcode(); 222 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; 223 unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg()); 224 if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) 225 break; 226 MachineInstr *DefMI = MRI->getVRegDef(TrueReg); 227 if (!DefMI) 228 break; 229 unsigned DefOpcode = DefMI->getOpcode(); 230 auto isConvertOfSplat = [=]() -> bool { 231 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS) 232 return false; 233 unsigned ConvReg = DefMI->getOperand(1).getReg(); 234 if (!TargetRegisterInfo::isVirtualRegister(ConvReg)) 235 return false; 236 MachineInstr *Splt = MRI->getVRegDef(ConvReg); 237 return Splt && (Splt->getOpcode() == PPC::LXVWSX || 238 Splt->getOpcode() == PPC::XXSPLTW); 239 }; 240 bool AlreadySplat = (MyOpcode == DefOpcode) || 241 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) || 242 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) || 243 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) || 244 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) || 245 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)|| 246 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat()); 247 // If the instruction[s] that feed this splat have already splat 248 // the value, this splat is redundant. 249 if (AlreadySplat) { 250 DEBUG(dbgs() << "Changing redundant splat to a copy: "); 251 DEBUG(MI.dump()); 252 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), 253 MI.getOperand(0).getReg()) 254 .add(MI.getOperand(OpNo)); 255 ToErase = &MI; 256 Simplified = true; 257 } 258 // Splat fed by a shift. Usually when we align value to splat into 259 // vector element zero. 260 if (DefOpcode == PPC::XXSLDWI) { 261 unsigned ShiftRes = DefMI->getOperand(0).getReg(); 262 unsigned ShiftOp1 = DefMI->getOperand(1).getReg(); 263 unsigned ShiftOp2 = DefMI->getOperand(2).getReg(); 264 unsigned ShiftImm = DefMI->getOperand(3).getImm(); 265 unsigned SplatImm = MI.getOperand(2).getImm(); 266 if (ShiftOp1 == ShiftOp2) { 267 unsigned NewElem = (SplatImm + ShiftImm) & 0x3; 268 if (MRI->hasOneNonDBGUse(ShiftRes)) { 269 DEBUG(dbgs() << "Removing redundant shift: "); 270 DEBUG(DefMI->dump()); 271 ToErase = DefMI; 272 } 273 Simplified = true; 274 DEBUG(dbgs() << "Changing splat immediate from " << SplatImm << 275 " to " << NewElem << " in instruction: "); 276 DEBUG(MI.dump()); 277 MI.getOperand(1).setReg(ShiftOp1); 278 MI.getOperand(2).setImm(NewElem); 279 } 280 } 281 break; 282 } 283 case PPC::XVCVDPSP: { 284 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant. 285 unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg()); 286 if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) 287 break; 288 MachineInstr *DefMI = MRI->getVRegDef(TrueReg); 289 290 // This can occur when building a vector of single precision or integer 291 // values. 292 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) { 293 unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg()); 294 unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg()); 295 if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) || 296 !TargetRegisterInfo::isVirtualRegister(DefsReg2)) 297 break; 298 MachineInstr *P1 = MRI->getVRegDef(DefsReg1); 299 MachineInstr *P2 = MRI->getVRegDef(DefsReg2); 300 301 if (!P1 || !P2) 302 break; 303 304 // Remove the passed FRSP instruction if it only feeds this MI and 305 // set any uses of that FRSP (in this MI) to the source of the FRSP. 306 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) { 307 if (RoundInstr->getOpcode() == PPC::FRSP && 308 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) { 309 Simplified = true; 310 unsigned ConvReg1 = RoundInstr->getOperand(1).getReg(); 311 unsigned FRSPDefines = RoundInstr->getOperand(0).getReg(); 312 MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines)); 313 for (int i = 0, e = Use.getNumOperands(); i < e; ++i) 314 if (Use.getOperand(i).isReg() && 315 Use.getOperand(i).getReg() == FRSPDefines) 316 Use.getOperand(i).setReg(ConvReg1); 317 DEBUG(dbgs() << "Removing redundant FRSP:\n"); 318 DEBUG(RoundInstr->dump()); 319 DEBUG(dbgs() << "As it feeds instruction:\n"); 320 DEBUG(MI.dump()); 321 DEBUG(dbgs() << "Through instruction:\n"); 322 DEBUG(DefMI->dump()); 323 RoundInstr->eraseFromParent(); 324 } 325 }; 326 327 // If the input to XVCVDPSP is a vector that was built (even 328 // partially) out of FRSP's, the FRSP(s) can safely be removed 329 // since this instruction performs the same operation. 330 if (P1 != P2) { 331 removeFRSPIfPossible(P1); 332 removeFRSPIfPossible(P2); 333 break; 334 } 335 removeFRSPIfPossible(P1); 336 } 337 break; 338 } 339 } 340 } 341 // If the last instruction was marked for elimination, 342 // remove it now. 343 if (ToErase) { 344 ToErase->eraseFromParent(); 345 ToErase = nullptr; 346 } 347 } 348 349 return Simplified; 350 } 351 352 // This is used to find the "true" source register for an 353 // XXPERMDI instruction, since MachineCSE does not handle the 354 // "copy-like" operations (Copy and SubregToReg). Returns 355 // the original SrcReg unless it is the target of a copy-like 356 // operation, in which case we chain backwards through all 357 // such operations to the ultimate source register. If a 358 // physical register is encountered, we stop the search. 359 unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) { 360 361 while (true) { 362 363 MachineInstr *MI = MRI->getVRegDef(SrcReg); 364 if (!MI->isCopyLike()) 365 return SrcReg; 366 367 unsigned CopySrcReg; 368 if (MI->isCopy()) 369 CopySrcReg = MI->getOperand(1).getReg(); 370 else { 371 assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike"); 372 CopySrcReg = MI->getOperand(2).getReg(); 373 } 374 375 if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) 376 return CopySrcReg; 377 378 SrcReg = CopySrcReg; 379 } 380 } 381 382 } // end default namespace 383 384 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE, 385 "PowerPC MI Peephole Optimization", false, false) 386 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE, 387 "PowerPC MI Peephole Optimization", false, false) 388 389 char PPCMIPeephole::ID = 0; 390 FunctionPass* 391 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); } 392 393