xref: /llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp (revision 7c842fadf100b2ed160986e40a9a68a0613df256)
1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 //
9 // This pass performs peephole optimizations to clean up ugly code
10 // sequences at the MachineInstruction layer.  It runs at the end of
11 // the SSA phases, following VSX swap removal.  A pass of dead code
12 // elimination follows this one for quick clean-up of any dead
13 // instructions introduced here.  Although we could do this as callbacks
14 // from the generic peephole pass, this would have a couple of bad
15 // effects:  it might remove optimization opportunities for VSX swap
16 // removal, and it would miss cleanups made possible following VSX
17 // swap removal.
18 //
19 //===---------------------------------------------------------------------===//
20 
21 #include "PPC.h"
22 #include "PPCInstrBuilder.h"
23 #include "PPCInstrInfo.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/Support/Debug.h"
31 #include "MCTargetDesc/PPCPredicates.h"
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "ppc-mi-peepholes"
36 
37 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
38 STATISTIC(MultiTOCSaves,
39           "Number of functions with multiple TOC saves that must be kept");
40 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
41 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
42 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
43 STATISTIC(NumConvertedToImmediateForm,
44           "Number of instructions converted to their immediate form");
45 STATISTIC(NumFunctionsEnteredInMIPeephole,
46           "Number of functions entered in PPC MI Peepholes");
47 STATISTIC(NumFixedPointIterations,
48           "Number of fixed-point iterations converting reg-reg instructions "
49           "to reg-imm ones");
50 STATISTIC(NumRotatesCollapsed,
51           "Number of pairs of rotate left, clear left/right collapsed");
52 
53 static cl::opt<bool>
54 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
55                    cl::desc("Iterate to a fixed point when attempting to "
56                             "convert reg-reg instructions to reg-imm"));
57 
58 static cl::opt<bool>
59 ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
60               cl::desc("Convert eligible reg+reg instructions to reg+imm"));
61 
62 static cl::opt<bool>
63     EnableSExtElimination("ppc-eliminate-signext",
64                           cl::desc("enable elimination of sign-extensions"),
65                           cl::init(false), cl::Hidden);
66 
67 static cl::opt<bool>
68     EnableZExtElimination("ppc-eliminate-zeroext",
69                           cl::desc("enable elimination of zero-extensions"),
70                           cl::init(false), cl::Hidden);
71 
72 namespace {
73 
74 struct PPCMIPeephole : public MachineFunctionPass {
75 
76   static char ID;
77   const PPCInstrInfo *TII;
78   MachineFunction *MF;
79   MachineRegisterInfo *MRI;
80 
81   PPCMIPeephole() : MachineFunctionPass(ID) {
82     initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
83   }
84 
85 private:
86   MachineDominatorTree *MDT;
87 
88   // Initialize class variables.
89   void initialize(MachineFunction &MFParm);
90 
91   // Perform peepholes.
92   bool simplifyCode(void);
93 
94   // Perform peepholes.
95   bool eliminateRedundantCompare(void);
96   bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
97   void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
98                       MachineInstr *MI);
99 
100 public:
101 
102   void getAnalysisUsage(AnalysisUsage &AU) const override {
103     AU.addRequired<MachineDominatorTree>();
104     AU.addPreserved<MachineDominatorTree>();
105     MachineFunctionPass::getAnalysisUsage(AU);
106   }
107 
108   // Main entry point for this pass.
109   bool runOnMachineFunction(MachineFunction &MF) override {
110     if (skipFunction(MF.getFunction()))
111       return false;
112     initialize(MF);
113     return simplifyCode();
114   }
115 };
116 
117 // Initialize class variables.
118 void PPCMIPeephole::initialize(MachineFunction &MFParm) {
119   MF = &MFParm;
120   MRI = &MF->getRegInfo();
121   MDT = &getAnalysis<MachineDominatorTree>();
122   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
123   LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
124   LLVM_DEBUG(MF->dump());
125 }
126 
127 static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
128                                       MachineRegisterInfo *MRI) {
129   assert(Op && "Invalid Operand!");
130   if (!Op->isReg())
131     return nullptr;
132 
133   unsigned Reg = Op->getReg();
134   if (!TargetRegisterInfo::isVirtualRegister(Reg))
135     return nullptr;
136 
137   return MRI->getVRegDef(Reg);
138 }
139 
140 // This function returns number of known zero bits in output of MI
141 // starting from the most significant bit.
142 static unsigned
143 getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
144   unsigned Opcode = MI->getOpcode();
145   if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
146       Opcode == PPC::RLDCL  || Opcode == PPC::RLDCLo)
147     return MI->getOperand(3).getImm();
148 
149   if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
150        MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
151     return MI->getOperand(3).getImm();
152 
153   if ((Opcode == PPC::RLWINM  || Opcode == PPC::RLWINMo ||
154        Opcode == PPC::RLWNM   || Opcode == PPC::RLWNMo  ||
155        Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
156        MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
157     return 32 + MI->getOperand(3).getImm();
158 
159   if (Opcode == PPC::ANDIo) {
160     uint16_t Imm = MI->getOperand(2).getImm();
161     return 48 + countLeadingZeros(Imm);
162   }
163 
164   if (Opcode == PPC::CNTLZW  || Opcode == PPC::CNTLZWo ||
165       Opcode == PPC::CNTTZW  || Opcode == PPC::CNTTZWo ||
166       Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
167     // The result ranges from 0 to 32.
168     return 58;
169 
170   if (Opcode == PPC::CNTLZD  || Opcode == PPC::CNTLZDo ||
171       Opcode == PPC::CNTTZD  || Opcode == PPC::CNTTZDo)
172     // The result ranges from 0 to 64.
173     return 57;
174 
175   if (Opcode == PPC::LHZ   || Opcode == PPC::LHZX  ||
176       Opcode == PPC::LHZ8  || Opcode == PPC::LHZX8 ||
177       Opcode == PPC::LHZU  || Opcode == PPC::LHZUX ||
178       Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
179     return 48;
180 
181   if (Opcode == PPC::LBZ   || Opcode == PPC::LBZX  ||
182       Opcode == PPC::LBZ8  || Opcode == PPC::LBZX8 ||
183       Opcode == PPC::LBZU  || Opcode == PPC::LBZUX ||
184       Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
185     return 56;
186 
187   if (TII->isZeroExtended(*MI))
188     return 32;
189 
190   return 0;
191 }
192 
193 // This function maintains a map for the pairs <TOC Save Instr, Keep>
194 // Each time a new TOC save is encountered, it checks if any of the existing
195 // ones are dominated by the new one. If so, it marks the existing one as
196 // redundant by setting it's entry in the map as false. It then adds the new
197 // instruction to the map with either true or false depending on if any
198 // existing instructions dominated the new one.
199 void PPCMIPeephole::UpdateTOCSaves(
200   std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
201   assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
202   bool Keep = true;
203   for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
204     MachineInstr *CurrInst = It->first;
205     // If new instruction dominates an existing one, mark existing one as
206     // redundant.
207     if (It->second && MDT->dominates(MI, CurrInst))
208       It->second = false;
209     // Check if the new instruction is redundant.
210     if (MDT->dominates(CurrInst, MI)) {
211       Keep = false;
212       break;
213     }
214   }
215   // Add new instruction to map.
216   TOCSaves[MI] = Keep;
217 }
218 
219 // Perform peephole optimizations.
220 bool PPCMIPeephole::simplifyCode(void) {
221   bool Simplified = false;
222   MachineInstr* ToErase = nullptr;
223   std::map<MachineInstr *, bool> TOCSaves;
224   const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
225   NumFunctionsEnteredInMIPeephole++;
226   if (ConvertRegReg) {
227     // Fixed-point conversion of reg/reg instructions fed by load-immediate
228     // into reg/imm instructions. FIXME: This is expensive, control it with
229     // an option.
230     bool SomethingChanged = false;
231     do {
232       NumFixedPointIterations++;
233       SomethingChanged = false;
234       for (MachineBasicBlock &MBB : *MF) {
235         for (MachineInstr &MI : MBB) {
236           if (MI.isDebugInstr())
237             continue;
238 
239           if (TII->convertToImmediateForm(MI)) {
240             // We don't erase anything in case the def has other uses. Let DCE
241             // remove it if it can be removed.
242             LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
243             LLVM_DEBUG(MI.dump());
244             NumConvertedToImmediateForm++;
245             SomethingChanged = true;
246             Simplified = true;
247             continue;
248           }
249         }
250       }
251     } while (SomethingChanged && FixedPointRegToImm);
252   }
253 
254   for (MachineBasicBlock &MBB : *MF) {
255     for (MachineInstr &MI : MBB) {
256 
257       // If the previous instruction was marked for elimination,
258       // remove it now.
259       if (ToErase) {
260         ToErase->eraseFromParent();
261         ToErase = nullptr;
262       }
263 
264       // Ignore debug instructions.
265       if (MI.isDebugInstr())
266         continue;
267 
268       // Per-opcode peepholes.
269       switch (MI.getOpcode()) {
270 
271       default:
272         break;
273 
274       case PPC::STD: {
275         MachineFrameInfo &MFI = MF->getFrameInfo();
276         if (MFI.hasVarSizedObjects() ||
277             !MF->getSubtarget<PPCSubtarget>().isELFv2ABI())
278           break;
279         // When encountering a TOC save instruction, call UpdateTOCSaves
280         // to add it to the TOCSaves map and mark any existing TOC saves
281         // it dominates as redundant.
282         if (TII->isTOCSaveMI(MI))
283           UpdateTOCSaves(TOCSaves, &MI);
284         break;
285       }
286       case PPC::XXPERMDI: {
287         // Perform simplifications of 2x64 vector swaps and splats.
288         // A swap is identified by an immediate value of 2, and a splat
289         // is identified by an immediate value of 0 or 3.
290         int Immed = MI.getOperand(3).getImm();
291 
292         if (Immed != 1) {
293 
294           // For each of these simplifications, we need the two source
295           // regs to match.  Unfortunately, MachineCSE ignores COPY and
296           // SUBREG_TO_REG, so for example we can see
297           //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
298           // We have to look through chains of COPY and SUBREG_TO_REG
299           // to find the real source values for comparison.
300           unsigned TrueReg1 =
301             TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
302           unsigned TrueReg2 =
303             TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
304 
305           if (TrueReg1 == TrueReg2
306               && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
307             MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
308             unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
309 
310             // If this is a splat fed by a splatting load, the splat is
311             // redundant. Replace with a copy. This doesn't happen directly due
312             // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
313             // a load of a double to a vector of 64-bit integers.
314             auto isConversionOfLoadAndSplat = [=]() -> bool {
315               if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
316                 return false;
317               unsigned DefReg =
318                 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
319               if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
320                 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
321                 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
322                   return true;
323               }
324               return false;
325             };
326             if (DefMI && (Immed == 0 || Immed == 3)) {
327               if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
328                 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
329                                      "to load-and-splat/copy: ");
330                 LLVM_DEBUG(MI.dump());
331                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
332                         MI.getOperand(0).getReg())
333                     .add(MI.getOperand(1));
334                 ToErase = &MI;
335                 Simplified = true;
336               }
337             }
338 
339             // If this is a splat or a swap fed by another splat, we
340             // can replace it with a copy.
341             if (DefOpc == PPC::XXPERMDI) {
342               unsigned FeedImmed = DefMI->getOperand(3).getImm();
343               unsigned FeedReg1 =
344                 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
345               unsigned FeedReg2 =
346                 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
347 
348               if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
349                 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
350                                      "to splat/copy: ");
351                 LLVM_DEBUG(MI.dump());
352                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
353                         MI.getOperand(0).getReg())
354                     .add(MI.getOperand(1));
355                 ToErase = &MI;
356                 Simplified = true;
357               }
358 
359               // If this is a splat fed by a swap, we can simplify modify
360               // the splat to splat the other value from the swap's input
361               // parameter.
362               else if ((Immed == 0 || Immed == 3)
363                        && FeedImmed == 2 && FeedReg1 == FeedReg2) {
364                 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
365                 LLVM_DEBUG(MI.dump());
366                 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
367                 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
368                 MI.getOperand(3).setImm(3 - Immed);
369                 Simplified = true;
370               }
371 
372               // If this is a swap fed by a swap, we can replace it
373               // with a copy from the first swap's input.
374               else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
375                 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
376                 LLVM_DEBUG(MI.dump());
377                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
378                         MI.getOperand(0).getReg())
379                     .add(DefMI->getOperand(1));
380                 ToErase = &MI;
381                 Simplified = true;
382               }
383             } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
384                        (DefMI->getOperand(2).getImm() == 0 ||
385                         DefMI->getOperand(2).getImm() == 3)) {
386               // Splat fed by another splat - switch the output of the first
387               // and remove the second.
388               DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
389               ToErase = &MI;
390               Simplified = true;
391               LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
392               LLVM_DEBUG(MI.dump());
393             }
394           }
395         }
396         break;
397       }
398       case PPC::VSPLTB:
399       case PPC::VSPLTH:
400       case PPC::XXSPLTW: {
401         unsigned MyOpcode = MI.getOpcode();
402         unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
403         unsigned TrueReg =
404           TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
405         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
406           break;
407         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
408         if (!DefMI)
409           break;
410         unsigned DefOpcode = DefMI->getOpcode();
411         auto isConvertOfSplat = [=]() -> bool {
412           if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
413             return false;
414           unsigned ConvReg = DefMI->getOperand(1).getReg();
415           if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
416             return false;
417           MachineInstr *Splt = MRI->getVRegDef(ConvReg);
418           return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
419             Splt->getOpcode() == PPC::XXSPLTW);
420         };
421         bool AlreadySplat = (MyOpcode == DefOpcode) ||
422           (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
423           (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
424           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
425           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
426           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
427           (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
428         // If the instruction[s] that feed this splat have already splat
429         // the value, this splat is redundant.
430         if (AlreadySplat) {
431           LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
432           LLVM_DEBUG(MI.dump());
433           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
434                   MI.getOperand(0).getReg())
435               .add(MI.getOperand(OpNo));
436           ToErase = &MI;
437           Simplified = true;
438         }
439         // Splat fed by a shift. Usually when we align value to splat into
440         // vector element zero.
441         if (DefOpcode == PPC::XXSLDWI) {
442           unsigned ShiftRes = DefMI->getOperand(0).getReg();
443           unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
444           unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
445           unsigned ShiftImm = DefMI->getOperand(3).getImm();
446           unsigned SplatImm = MI.getOperand(2).getImm();
447           if (ShiftOp1 == ShiftOp2) {
448             unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
449             if (MRI->hasOneNonDBGUse(ShiftRes)) {
450               LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
451               LLVM_DEBUG(DefMI->dump());
452               ToErase = DefMI;
453             }
454             Simplified = true;
455             LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
456                               << " to " << NewElem << " in instruction: ");
457             LLVM_DEBUG(MI.dump());
458             MI.getOperand(1).setReg(ShiftOp1);
459             MI.getOperand(2).setImm(NewElem);
460           }
461         }
462         break;
463       }
464       case PPC::XVCVDPSP: {
465         // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
466         unsigned TrueReg =
467           TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
468         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
469           break;
470         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
471 
472         // This can occur when building a vector of single precision or integer
473         // values.
474         if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
475           unsigned DefsReg1 =
476             TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
477           unsigned DefsReg2 =
478             TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
479           if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
480               !TargetRegisterInfo::isVirtualRegister(DefsReg2))
481             break;
482           MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
483           MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
484 
485           if (!P1 || !P2)
486             break;
487 
488           // Remove the passed FRSP instruction if it only feeds this MI and
489           // set any uses of that FRSP (in this MI) to the source of the FRSP.
490           auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
491             if (RoundInstr->getOpcode() == PPC::FRSP &&
492                 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
493               Simplified = true;
494               unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
495               unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
496               MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
497               for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
498                 if (Use.getOperand(i).isReg() &&
499                     Use.getOperand(i).getReg() == FRSPDefines)
500                   Use.getOperand(i).setReg(ConvReg1);
501               LLVM_DEBUG(dbgs() << "Removing redundant FRSP:\n");
502               LLVM_DEBUG(RoundInstr->dump());
503               LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
504               LLVM_DEBUG(MI.dump());
505               LLVM_DEBUG(dbgs() << "Through instruction:\n");
506               LLVM_DEBUG(DefMI->dump());
507               RoundInstr->eraseFromParent();
508             }
509           };
510 
511           // If the input to XVCVDPSP is a vector that was built (even
512           // partially) out of FRSP's, the FRSP(s) can safely be removed
513           // since this instruction performs the same operation.
514           if (P1 != P2) {
515             removeFRSPIfPossible(P1);
516             removeFRSPIfPossible(P2);
517             break;
518           }
519           removeFRSPIfPossible(P1);
520         }
521         break;
522       }
523       case PPC::EXTSH:
524       case PPC::EXTSH8:
525       case PPC::EXTSH8_32_64: {
526         if (!EnableSExtElimination) break;
527         unsigned NarrowReg = MI.getOperand(1).getReg();
528         if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
529           break;
530 
531         MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
532         // If we've used a zero-extending load that we will sign-extend,
533         // just do a sign-extending load.
534         if (SrcMI->getOpcode() == PPC::LHZ ||
535             SrcMI->getOpcode() == PPC::LHZX) {
536           if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
537             break;
538           auto is64Bit = [] (unsigned Opcode) {
539             return Opcode == PPC::EXTSH8;
540           };
541           auto isXForm = [] (unsigned Opcode) {
542             return Opcode == PPC::LHZX;
543           };
544           auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
545             if (is64Bit)
546               if (isXForm) return PPC::LHAX8;
547               else         return PPC::LHA8;
548             else
549               if (isXForm) return PPC::LHAX;
550               else         return PPC::LHA;
551           };
552           unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
553                                        isXForm(SrcMI->getOpcode()));
554           LLVM_DEBUG(dbgs() << "Zero-extending load\n");
555           LLVM_DEBUG(SrcMI->dump());
556           LLVM_DEBUG(dbgs() << "and sign-extension\n");
557           LLVM_DEBUG(MI.dump());
558           LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
559           SrcMI->setDesc(TII->get(Opc));
560           SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
561           ToErase = &MI;
562           Simplified = true;
563           NumEliminatedSExt++;
564         }
565         break;
566       }
567       case PPC::EXTSW:
568       case PPC::EXTSW_32:
569       case PPC::EXTSW_32_64: {
570         if (!EnableSExtElimination) break;
571         unsigned NarrowReg = MI.getOperand(1).getReg();
572         if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
573           break;
574 
575         MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
576         // If we've used a zero-extending load that we will sign-extend,
577         // just do a sign-extending load.
578         if (SrcMI->getOpcode() == PPC::LWZ ||
579             SrcMI->getOpcode() == PPC::LWZX) {
580           if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
581             break;
582           auto is64Bit = [] (unsigned Opcode) {
583             return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
584           };
585           auto isXForm = [] (unsigned Opcode) {
586             return Opcode == PPC::LWZX;
587           };
588           auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
589             if (is64Bit)
590               if (isXForm) return PPC::LWAX;
591               else         return PPC::LWA;
592             else
593               if (isXForm) return PPC::LWAX_32;
594               else         return PPC::LWA_32;
595           };
596           unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
597                                        isXForm(SrcMI->getOpcode()));
598           LLVM_DEBUG(dbgs() << "Zero-extending load\n");
599           LLVM_DEBUG(SrcMI->dump());
600           LLVM_DEBUG(dbgs() << "and sign-extension\n");
601           LLVM_DEBUG(MI.dump());
602           LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
603           SrcMI->setDesc(TII->get(Opc));
604           SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
605           ToErase = &MI;
606           Simplified = true;
607           NumEliminatedSExt++;
608         } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
609                    TII->isSignExtended(*SrcMI)) {
610           // We can eliminate EXTSW if the input is known to be already
611           // sign-extended.
612           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
613           unsigned TmpReg =
614             MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
615           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
616                   TmpReg);
617           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
618                   MI.getOperand(0).getReg())
619               .addReg(TmpReg)
620               .addReg(NarrowReg)
621               .addImm(PPC::sub_32);
622           ToErase = &MI;
623           Simplified = true;
624           NumEliminatedSExt++;
625         }
626         break;
627       }
628       case PPC::RLDICL: {
629         // We can eliminate RLDICL (e.g. for zero-extension)
630         // if all bits to clear are already zero in the input.
631         // This code assume following code sequence for zero-extension.
632         //   %6 = COPY %5:sub_32; (optional)
633         //   %8 = IMPLICIT_DEF;
634         //   %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
635         if (!EnableZExtElimination) break;
636 
637         if (MI.getOperand(2).getImm() != 0)
638           break;
639 
640         unsigned SrcReg = MI.getOperand(1).getReg();
641         if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
642           break;
643 
644         MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
645         if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
646               SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
647           break;
648 
649         MachineInstr *ImpDefMI, *SubRegMI;
650         ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
651         SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
652         if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
653 
654         SrcMI = SubRegMI;
655         if (SubRegMI->getOpcode() == PPC::COPY) {
656           unsigned CopyReg = SubRegMI->getOperand(1).getReg();
657           if (TargetRegisterInfo::isVirtualRegister(CopyReg))
658             SrcMI = MRI->getVRegDef(CopyReg);
659         }
660 
661         unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
662         if (MI.getOperand(3).getImm() <= KnownZeroCount) {
663           LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
664           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
665                   MI.getOperand(0).getReg())
666               .addReg(SrcReg);
667           ToErase = &MI;
668           Simplified = true;
669           NumEliminatedZExt++;
670         }
671         break;
672       }
673 
674       // TODO: Any instruction that has an immediate form fed only by a PHI
675       // whose operands are all load immediate can be folded away. We currently
676       // do this for ADD instructions, but should expand it to arithmetic and
677       // binary instructions with immediate forms in the future.
678       case PPC::ADD4:
679       case PPC::ADD8: {
680         auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
681           assert(PhiOp && "Invalid Operand!");
682           MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
683 
684           return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
685                  MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
686         };
687 
688         auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
689                                             MachineOperand *PhiOp) {
690           assert(PhiOp && "Invalid Operand!");
691           assert(DominatorOp && "Invalid Operand!");
692           MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
693           MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
694 
695           // Note: the vregs only show up at odd indices position of PHI Node,
696           // the even indices position save the BB info.
697           for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
698             MachineInstr *LiMI =
699                 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
700             if (!LiMI ||
701                 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
702                 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
703                 !MDT->dominates(DefDomMI, LiMI))
704               return false;
705           }
706 
707           return true;
708         };
709 
710         MachineOperand Op1 = MI.getOperand(1);
711         MachineOperand Op2 = MI.getOperand(2);
712         if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
713           std::swap(Op1, Op2);
714         else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
715           break; // We don't have an ADD fed by LI's that can be transformed
716 
717         // Now we know that Op1 is the PHI node and Op2 is the dominator
718         unsigned DominatorReg = Op2.getReg();
719 
720         const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
721                                              ? &PPC::G8RC_and_G8RC_NOX0RegClass
722                                              : &PPC::GPRC_and_GPRC_NOR0RegClass;
723         MRI->setRegClass(DominatorReg, TRC);
724 
725         // replace LIs with ADDIs
726         MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
727         for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
728           MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
729           LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
730           LLVM_DEBUG(LiMI->dump());
731 
732           // There could be repeated registers in the PHI, e.g: %1 =
733           // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
734           // already replaced the def instruction, skip.
735           if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
736             continue;
737 
738           assert((LiMI->getOpcode() == PPC::LI ||
739                   LiMI->getOpcode() == PPC::LI8) &&
740                  "Invalid Opcode!");
741           auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
742           LiMI->RemoveOperand(1);                    // remove the imm of LI
743           LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
744                                                               : PPC::ADDI8));
745           MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
746               .addReg(DominatorReg)
747               .addImm(LiImm); // restore the imm of LI
748           LLVM_DEBUG(LiMI->dump());
749         }
750 
751         // Replace ADD with COPY
752         LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
753         LLVM_DEBUG(MI.dump());
754         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
755                 MI.getOperand(0).getReg())
756             .add(Op1);
757         ToErase = &MI;
758         Simplified = true;
759         NumOptADDLIs++;
760         break;
761       }
762       case PPC::RLDICR: {
763         // We miss the opportunity to emit an RLDIC when lowering jump tables
764         // since ISEL sees only a single basic block. When selecting, the clear
765         // and shift left will be in different blocks.
766         unsigned SrcReg = MI.getOperand(1).getReg();
767         if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
768           break;
769 
770         MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
771         if (SrcMI->getOpcode() != PPC::RLDICL)
772           break;
773         MachineOperand MOpSHSrc = SrcMI->getOperand(2);
774         MachineOperand MOpMBSrc = SrcMI->getOperand(3);
775         MachineOperand MOpSHMI = MI.getOperand(2);
776         MachineOperand MOpMEMI = MI.getOperand(3);
777         if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() &&
778               MOpSHMI.isImm() && MOpMEMI.isImm()))
779           break;
780         uint64_t SHSrc = MOpSHSrc.getImm();
781         uint64_t MBSrc = MOpMBSrc.getImm();
782         uint64_t SHMI = MOpSHMI.getImm();
783         uint64_t MEMI = MOpMEMI.getImm();
784         uint64_t NewSH = SHSrc + SHMI;
785         uint64_t NewMB = MBSrc - SHMI;
786         if (NewMB > 63 || NewSH > 63)
787           break;
788 
789         // The bits cleared with RLDICL are [0, MBSrc).
790         // The bits cleared with RLDICR are (MEMI, 63].
791         // After the sequence, the bits cleared are:
792         // [0, MBSrc-SHMI) and (MEMI, 63).
793         //
794         // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
795         if ((63 - NewSH) != MEMI)
796           break;
797 
798         LLVM_DEBUG(dbgs() << "Converting pair: ");
799         LLVM_DEBUG(SrcMI->dump());
800         LLVM_DEBUG(MI.dump());
801 
802         MI.setDesc(TII->get(PPC::RLDIC));
803         MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
804         MI.getOperand(2).setImm(NewSH);
805         MI.getOperand(3).setImm(NewMB);
806 
807         LLVM_DEBUG(dbgs() << "To: ");
808         LLVM_DEBUG(MI.dump());
809         NumRotatesCollapsed++;
810         break;
811       }
812       }
813     }
814 
815     // If the last instruction was marked for elimination,
816     // remove it now.
817     if (ToErase) {
818       ToErase->eraseFromParent();
819       ToErase = nullptr;
820     }
821   }
822 
823   // Eliminate all the TOC save instructions which are redundant.
824   Simplified |= eliminateRedundantTOCSaves(TOCSaves);
825   // We try to eliminate redundant compare instruction.
826   Simplified |= eliminateRedundantCompare();
827 
828   return Simplified;
829 }
830 
831 // helper functions for eliminateRedundantCompare
832 static bool isEqOrNe(MachineInstr *BI) {
833   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
834   unsigned PredCond = PPC::getPredicateCondition(Pred);
835   return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
836 }
837 
838 static bool isSupportedCmpOp(unsigned opCode) {
839   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD  ||
840           opCode == PPC::CMPLW  || opCode == PPC::CMPW  ||
841           opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
842           opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
843 }
844 
845 static bool is64bitCmpOp(unsigned opCode) {
846   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD ||
847           opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
848 }
849 
850 static bool isSignedCmpOp(unsigned opCode) {
851   return (opCode == PPC::CMPD  || opCode == PPC::CMPW ||
852           opCode == PPC::CMPDI || opCode == PPC::CMPWI);
853 }
854 
855 static unsigned getSignedCmpOpCode(unsigned opCode) {
856   if (opCode == PPC::CMPLD)  return PPC::CMPD;
857   if (opCode == PPC::CMPLW)  return PPC::CMPW;
858   if (opCode == PPC::CMPLDI) return PPC::CMPDI;
859   if (opCode == PPC::CMPLWI) return PPC::CMPWI;
860   return opCode;
861 }
862 
863 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or
864 // (LT x) to (LE x-1)
865 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
866   uint64_t Imm = CMPI->getOperand(2).getImm();
867   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
868   if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
869     return 0;
870 
871   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
872   unsigned PredCond = PPC::getPredicateCondition(Pred);
873   unsigned PredHint = PPC::getPredicateHint(Pred);
874   if (PredCond == PPC::PRED_GE)
875     return PPC::getPredicate(PPC::PRED_GT, PredHint);
876   if (PredCond == PPC::PRED_LT)
877     return PPC::getPredicate(PPC::PRED_LE, PredHint);
878 
879   return 0;
880 }
881 
882 // We can increment immediate x in (GT x) by changing it to (GE x+1) or
883 // (LE x) to (LT x+1)
884 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
885   uint64_t Imm = CMPI->getOperand(2).getImm();
886   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
887   if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
888     return 0;
889 
890   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
891   unsigned PredCond = PPC::getPredicateCondition(Pred);
892   unsigned PredHint = PPC::getPredicateHint(Pred);
893   if (PredCond == PPC::PRED_GT)
894     return PPC::getPredicate(PPC::PRED_GE, PredHint);
895   if (PredCond == PPC::PRED_LE)
896     return PPC::getPredicate(PPC::PRED_LT, PredHint);
897 
898   return 0;
899 }
900 
901 // This takes a Phi node and returns a register value for the specified BB.
902 static unsigned getIncomingRegForBlock(MachineInstr *Phi,
903                                        MachineBasicBlock *MBB) {
904   for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
905     MachineOperand &MO = Phi->getOperand(I);
906     if (MO.getMBB() == MBB)
907       return Phi->getOperand(I-1).getReg();
908   }
909   llvm_unreachable("invalid src basic block for this Phi node\n");
910   return 0;
911 }
912 
913 // This function tracks the source of the register through register copy.
914 // If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
915 // assuming that the control comes from BB1 into BB2.
916 static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
917                            MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
918   unsigned SrcReg = Reg;
919   while (1) {
920     unsigned NextReg = SrcReg;
921     MachineInstr *Inst = MRI->getVRegDef(SrcReg);
922     if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
923       NextReg = getIncomingRegForBlock(Inst, BB1);
924       // We track through PHI only once to avoid infinite loop.
925       BB1 = nullptr;
926     }
927     else if (Inst->isFullCopy())
928       NextReg = Inst->getOperand(1).getReg();
929     if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg))
930       break;
931     SrcReg = NextReg;
932   }
933   return SrcReg;
934 }
935 
936 static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
937                                           MachineBasicBlock *&PredMBB,
938                                           MachineBasicBlock *&MBBtoMoveCmp,
939                                           MachineRegisterInfo *MRI) {
940 
941   auto isEligibleBB = [&](MachineBasicBlock &BB) {
942     auto BII = BB.getFirstInstrTerminator();
943     // We optimize BBs ending with a conditional branch.
944     // We check only for BCC here, not BCCLR, because BCCLR
945     // will be formed only later in the pipeline.
946     if (BB.succ_size() == 2 &&
947         BII != BB.instr_end() &&
948         (*BII).getOpcode() == PPC::BCC &&
949         (*BII).getOperand(1).isReg()) {
950       // We optimize only if the condition code is used only by one BCC.
951       unsigned CndReg = (*BII).getOperand(1).getReg();
952       if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
953           !MRI->hasOneNonDBGUse(CndReg))
954         return false;
955 
956       MachineInstr *CMPI = MRI->getVRegDef(CndReg);
957       // We assume compare and branch are in the same BB for ease of analysis.
958       if (CMPI->getParent() != &BB)
959         return false;
960 
961       // We skip this BB if a physical register is used in comparison.
962       for (MachineOperand &MO : CMPI->operands())
963         if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
964           return false;
965 
966       return true;
967     }
968     return false;
969   };
970 
971   // If this BB has more than one successor, we can create a new BB and
972   // move the compare instruction in the new BB.
973   // So far, we do not move compare instruction to a BB having multiple
974   // successors to avoid potentially increasing code size.
975   auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
976     return BB.succ_size() == 1;
977   };
978 
979   if (!isEligibleBB(MBB))
980     return false;
981 
982   unsigned NumPredBBs = MBB.pred_size();
983   if (NumPredBBs == 1) {
984     MachineBasicBlock *TmpMBB = *MBB.pred_begin();
985     if (isEligibleBB(*TmpMBB)) {
986       PredMBB = TmpMBB;
987       MBBtoMoveCmp = nullptr;
988       return true;
989     }
990   }
991   else if (NumPredBBs == 2) {
992     // We check for partially redundant case.
993     // So far, we support cases with only two predecessors
994     // to avoid increasing the number of instructions.
995     MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
996     MachineBasicBlock *Pred1MBB = *PI;
997     MachineBasicBlock *Pred2MBB = *(PI+1);
998 
999     if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1000       // We assume Pred1MBB is the BB containing the compare to be merged and
1001       // Pred2MBB is the BB to which we will append a compare instruction.
1002       // Hence we can proceed as is.
1003     }
1004     else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1005       // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1006       std::swap(Pred1MBB, Pred2MBB);
1007     }
1008     else return false;
1009 
1010     // Here, Pred2MBB is the BB to which we need to append a compare inst.
1011     // We cannot move the compare instruction if operands are not available
1012     // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1013     MachineInstr *BI = &*MBB.getFirstInstrTerminator();
1014     MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1015     for (int I = 1; I <= 2; I++)
1016       if (CMPI->getOperand(I).isReg()) {
1017         MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1018         if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1019           return false;
1020       }
1021 
1022     PredMBB = Pred1MBB;
1023     MBBtoMoveCmp = Pred2MBB;
1024     return true;
1025   }
1026 
1027   return false;
1028 }
1029 
1030 // This function will iterate over the input map containing a pair of TOC save
1031 // instruction and a flag. The flag will be set to false if the TOC save is
1032 // proven redundant. This function will erase from the basic block all the TOC
1033 // saves marked as redundant.
1034 bool PPCMIPeephole::eliminateRedundantTOCSaves(
1035     std::map<MachineInstr *, bool> &TOCSaves) {
1036   bool Simplified = false;
1037   int NumKept = 0;
1038   for (auto TOCSave : TOCSaves) {
1039     if (!TOCSave.second) {
1040       TOCSave.first->eraseFromParent();
1041       RemoveTOCSave++;
1042       Simplified = true;
1043     } else {
1044       NumKept++;
1045     }
1046   }
1047 
1048   if (NumKept > 1)
1049     MultiTOCSaves++;
1050 
1051   return Simplified;
1052 }
1053 
1054 // If multiple conditional branches are executed based on the (essentially)
1055 // same comparison, we merge compare instructions into one and make multiple
1056 // conditional branches on this comparison.
1057 // For example,
1058 //   if (a == 0) { ... }
1059 //   else if (a < 0) { ... }
1060 // can be executed by one compare and two conditional branches instead of
1061 // two pairs of a compare and a conditional branch.
1062 //
1063 // This method merges two compare instructions in two MBBs and modifies the
1064 // compare and conditional branch instructions if needed.
1065 // For the above example, the input for this pass looks like:
1066 //   cmplwi r3, 0
1067 //   beq    0, .LBB0_3
1068 //   cmpwi  r3, -1
1069 //   bgt    0, .LBB0_4
1070 // So, before merging two compares, we need to modify these instructions as
1071 //   cmpwi  r3, 0       ; cmplwi and cmpwi yield same result for beq
1072 //   beq    0, .LBB0_3
1073 //   cmpwi  r3, 0       ; greather than -1 means greater or equal to 0
1074 //   bge    0, .LBB0_4
1075 
1076 bool PPCMIPeephole::eliminateRedundantCompare(void) {
1077   bool Simplified = false;
1078 
1079   for (MachineBasicBlock &MBB2 : *MF) {
1080     MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1081 
1082     // For fully redundant case, we select two basic blocks MBB1 and MBB2
1083     // as an optimization target if
1084     // - both MBBs end with a conditional branch,
1085     // - MBB1 is the only predecessor of MBB2, and
1086     // - compare does not take a physical register as a operand in both MBBs.
1087     // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1088     //
1089     // As partially redundant case, we additionally handle if MBB2 has one
1090     // additional predecessor, which has only one successor (MBB2).
1091     // In this case, we move the compare instruction originally in MBB2 into
1092     // MBBtoMoveCmp. This partially redundant case is typically appear by
1093     // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1094     //
1095     // Overview of CFG of related basic blocks
1096     // Fully redundant case        Partially redundant case
1097     //   --------                   ----------------  --------
1098     //   | MBB1 | (w/ 2 succ)       | MBBtoMoveCmp |  | MBB1 | (w/ 2 succ)
1099     //   --------                   ----------------  --------
1100     //      |    \                     (w/ 1 succ) \     |    \
1101     //      |     \                                 \    |     \
1102     //      |                                        \   |
1103     //   --------                                     --------
1104     //   | MBB2 | (w/ 1 pred                          | MBB2 | (w/ 2 pred
1105     //   -------- and 2 succ)                         -------- and 2 succ)
1106     //      |    \                                       |    \
1107     //      |     \                                      |     \
1108     //
1109     if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1110       continue;
1111 
1112     MachineInstr *BI1   = &*MBB1->getFirstInstrTerminator();
1113     MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1114 
1115     MachineInstr *BI2   = &*MBB2.getFirstInstrTerminator();
1116     MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1117     bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1118 
1119     // We cannot optimize an unsupported compare opcode or
1120     // a mix of 32-bit and 64-bit comaprisons
1121     if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1122         !isSupportedCmpOp(CMPI2->getOpcode()) ||
1123         is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1124       continue;
1125 
1126     unsigned NewOpCode = 0;
1127     unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1128     int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1129     bool SwapOperands = false;
1130 
1131     if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1132       // Typically, unsigned comparison is used for equality check, but
1133       // we replace it with a signed comparison if the comparison
1134       // to be merged is a signed comparison.
1135       // In other cases of opcode mismatch, we cannot optimize this.
1136 
1137       // We cannot change opcode when comparing against an immediate
1138       // if the most significant bit of the immediate is one
1139       // due to the difference in sign extension.
1140       auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1141         if (!I->getOperand(2).isImm())
1142           return false;
1143         int16_t Imm = (int16_t)I->getOperand(2).getImm();
1144         return Imm < 0;
1145       };
1146 
1147       if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1148           CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1149         NewOpCode = CMPI1->getOpcode();
1150       else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1151                getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1152         NewOpCode = CMPI2->getOpcode();
1153       else continue;
1154     }
1155 
1156     if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1157       // In case of comparisons between two registers, these two registers
1158       // must be same to merge two comparisons.
1159       unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1160                                          nullptr, nullptr, MRI);
1161       unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1162                                          nullptr, nullptr, MRI);
1163       unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1164                                          MBB1, &MBB2, MRI);
1165       unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1166                                          MBB1, &MBB2, MRI);
1167 
1168       if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1169         // Same pair of registers in the same order; ready to merge as is.
1170       }
1171       else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1172         // Same pair of registers in different order.
1173         // We reverse the predicate to merge compare instructions.
1174         PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
1175         NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1176         // In case of partial redundancy, we need to swap operands
1177         // in another compare instruction.
1178         SwapOperands = true;
1179       }
1180       else continue;
1181     }
1182     else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1183       // In case of comparisons between a register and an immediate,
1184       // the operand register must be same for two compare instructions.
1185       unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1186                                          nullptr, nullptr, MRI);
1187       unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1188                                          MBB1, &MBB2, MRI);
1189       if (Cmp1Operand1 != Cmp2Operand1)
1190         continue;
1191 
1192       NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1193       NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1194 
1195       // If immediate are not same, we try to adjust by changing predicate;
1196       // e.g. GT imm means GE (imm+1).
1197       if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1198         int Diff = Imm1 - Imm2;
1199         if (Diff < -2 || Diff > 2)
1200           continue;
1201 
1202         unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1203         unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1204         unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1205         unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1206         if (Diff == 2) {
1207           if (PredToInc2 && PredToDec1) {
1208             NewPredicate2 = PredToInc2;
1209             NewPredicate1 = PredToDec1;
1210             NewImm2++;
1211             NewImm1--;
1212           }
1213         }
1214         else if (Diff == 1) {
1215           if (PredToInc2) {
1216             NewImm2++;
1217             NewPredicate2 = PredToInc2;
1218           }
1219           else if (PredToDec1) {
1220             NewImm1--;
1221             NewPredicate1 = PredToDec1;
1222           }
1223         }
1224         else if (Diff == -1) {
1225           if (PredToDec2) {
1226             NewImm2--;
1227             NewPredicate2 = PredToDec2;
1228           }
1229           else if (PredToInc1) {
1230             NewImm1++;
1231             NewPredicate1 = PredToInc1;
1232           }
1233         }
1234         else if (Diff == -2) {
1235           if (PredToDec2 && PredToInc1) {
1236             NewPredicate2 = PredToDec2;
1237             NewPredicate1 = PredToInc1;
1238             NewImm2--;
1239             NewImm1++;
1240           }
1241         }
1242       }
1243 
1244       // We cannot merge two compares if the immediates are not same.
1245       if (NewImm2 != NewImm1)
1246         continue;
1247     }
1248 
1249     LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1250     LLVM_DEBUG(CMPI1->dump());
1251     LLVM_DEBUG(BI1->dump());
1252     LLVM_DEBUG(CMPI2->dump());
1253     LLVM_DEBUG(BI2->dump());
1254 
1255     // We adjust opcode, predicates and immediate as we determined above.
1256     if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1257       CMPI1->setDesc(TII->get(NewOpCode));
1258     }
1259     if (NewPredicate1) {
1260       BI1->getOperand(0).setImm(NewPredicate1);
1261     }
1262     if (NewPredicate2) {
1263       BI2->getOperand(0).setImm(NewPredicate2);
1264     }
1265     if (NewImm1 != Imm1) {
1266       CMPI1->getOperand(2).setImm(NewImm1);
1267     }
1268 
1269     if (IsPartiallyRedundant) {
1270       // We touch up the compare instruction in MBB2 and move it to
1271       // a previous BB to handle partially redundant case.
1272       if (SwapOperands) {
1273         unsigned Op1 = CMPI2->getOperand(1).getReg();
1274         unsigned Op2 = CMPI2->getOperand(2).getReg();
1275         CMPI2->getOperand(1).setReg(Op2);
1276         CMPI2->getOperand(2).setReg(Op1);
1277       }
1278       if (NewImm2 != Imm2)
1279         CMPI2->getOperand(2).setImm(NewImm2);
1280 
1281       for (int I = 1; I <= 2; I++) {
1282         if (CMPI2->getOperand(I).isReg()) {
1283           MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1284           if (Inst->getParent() != &MBB2)
1285             continue;
1286 
1287           assert(Inst->getOpcode() == PPC::PHI &&
1288                  "We cannot support if an operand comes from this BB.");
1289           unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1290           CMPI2->getOperand(I).setReg(SrcReg);
1291         }
1292       }
1293       auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1294       MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1295 
1296       DebugLoc DL = CMPI2->getDebugLoc();
1297       unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1298       BuildMI(MBB2, MBB2.begin(), DL,
1299               TII->get(PPC::PHI), NewVReg)
1300         .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1301         .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1302       BI2->getOperand(1).setReg(NewVReg);
1303     }
1304     else {
1305       // We finally eliminate compare instruction in MBB2.
1306       BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1307       CMPI2->eraseFromParent();
1308     }
1309     BI2->getOperand(1).setIsKill(true);
1310     BI1->getOperand(1).setIsKill(false);
1311 
1312     LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1313     LLVM_DEBUG(CMPI1->dump());
1314     LLVM_DEBUG(BI1->dump());
1315     LLVM_DEBUG(BI2->dump());
1316     if (IsPartiallyRedundant) {
1317       LLVM_DEBUG(dbgs() << "The following compare is moved into "
1318                         << printMBBReference(*MBBtoMoveCmp)
1319                         << " to handle partial redundancy.\n");
1320       LLVM_DEBUG(CMPI2->dump());
1321     }
1322 
1323     Simplified = true;
1324   }
1325 
1326   return Simplified;
1327 }
1328 
1329 } // end default namespace
1330 
1331 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
1332                       "PowerPC MI Peephole Optimization", false, false)
1333 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
1334                     "PowerPC MI Peephole Optimization", false, false)
1335 
1336 char PPCMIPeephole::ID = 0;
1337 FunctionPass*
1338 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
1339 
1340