xref: /llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp (revision 174b4ff781a56b98d8f43454fbda834c5dab3cc2)
1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 //
9 // This pass performs peephole optimizations to clean up ugly code
10 // sequences at the MachineInstruction layer.  It runs at the end of
11 // the SSA phases, following VSX swap removal.  A pass of dead code
12 // elimination follows this one for quick clean-up of any dead
13 // instructions introduced here.  Although we could do this as callbacks
14 // from the generic peephole pass, this would have a couple of bad
15 // effects:  it might remove optimization opportunities for VSX swap
16 // removal, and it would miss cleanups made possible following VSX
17 // swap removal.
18 //
19 //===---------------------------------------------------------------------===//
20 
21 #include "PPC.h"
22 #include "PPCInstrBuilder.h"
23 #include "PPCInstrInfo.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/Support/Debug.h"
31 #include "MCTargetDesc/PPCPredicates.h"
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "ppc-mi-peepholes"
36 
37 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
38 STATISTIC(MultiTOCSaves,
39           "Number of functions with multiple TOC saves that must be kept");
40 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
41 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
42 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
43 STATISTIC(NumConvertedToImmediateForm,
44           "Number of instructions converted to their immediate form");
45 STATISTIC(NumFunctionsEnteredInMIPeephole,
46           "Number of functions entered in PPC MI Peepholes");
47 STATISTIC(NumFixedPointIterations,
48           "Number of fixed-point iterations converting reg-reg instructions "
49           "to reg-imm ones");
50 STATISTIC(NumRotatesCollapsed,
51           "Number of pairs of rotate left, clear left/right collapsed");
52 
53 static cl::opt<bool>
54 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
55                    cl::desc("Iterate to a fixed point when attempting to "
56                             "convert reg-reg instructions to reg-imm"));
57 
58 static cl::opt<bool>
59 ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
60               cl::desc("Convert eligible reg+reg instructions to reg+imm"));
61 
62 static cl::opt<bool>
63     EnableSExtElimination("ppc-eliminate-signext",
64                           cl::desc("enable elimination of sign-extensions"),
65                           cl::init(false), cl::Hidden);
66 
67 static cl::opt<bool>
68     EnableZExtElimination("ppc-eliminate-zeroext",
69                           cl::desc("enable elimination of zero-extensions"),
70                           cl::init(false), cl::Hidden);
71 
72 namespace {
73 
74 struct PPCMIPeephole : public MachineFunctionPass {
75 
76   static char ID;
77   const PPCInstrInfo *TII;
78   MachineFunction *MF;
79   MachineRegisterInfo *MRI;
80 
81   PPCMIPeephole() : MachineFunctionPass(ID) {
82     initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
83   }
84 
85 private:
86   MachineDominatorTree *MDT;
87 
88   // Initialize class variables.
89   void initialize(MachineFunction &MFParm);
90 
91   // Perform peepholes.
92   bool simplifyCode(void);
93 
94   // Perform peepholes.
95   bool eliminateRedundantCompare(void);
96   bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
97   void emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
98   void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
99                       MachineInstr *MI);
100 
101 public:
102 
103   void getAnalysisUsage(AnalysisUsage &AU) const override {
104     AU.addRequired<MachineDominatorTree>();
105     AU.addPreserved<MachineDominatorTree>();
106     MachineFunctionPass::getAnalysisUsage(AU);
107   }
108 
109   // Main entry point for this pass.
110   bool runOnMachineFunction(MachineFunction &MF) override {
111     if (skipFunction(MF.getFunction()))
112       return false;
113     initialize(MF);
114     return simplifyCode();
115   }
116 };
117 
118 // Initialize class variables.
119 void PPCMIPeephole::initialize(MachineFunction &MFParm) {
120   MF = &MFParm;
121   MRI = &MF->getRegInfo();
122   MDT = &getAnalysis<MachineDominatorTree>();
123   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
124   LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
125   LLVM_DEBUG(MF->dump());
126 }
127 
128 static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
129                                       MachineRegisterInfo *MRI) {
130   assert(Op && "Invalid Operand!");
131   if (!Op->isReg())
132     return nullptr;
133 
134   unsigned Reg = Op->getReg();
135   if (!TargetRegisterInfo::isVirtualRegister(Reg))
136     return nullptr;
137 
138   return MRI->getVRegDef(Reg);
139 }
140 
141 // This function returns number of known zero bits in output of MI
142 // starting from the most significant bit.
143 static unsigned
144 getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
145   unsigned Opcode = MI->getOpcode();
146   if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
147       Opcode == PPC::RLDCL  || Opcode == PPC::RLDCLo)
148     return MI->getOperand(3).getImm();
149 
150   if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
151        MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
152     return MI->getOperand(3).getImm();
153 
154   if ((Opcode == PPC::RLWINM  || Opcode == PPC::RLWINMo ||
155        Opcode == PPC::RLWNM   || Opcode == PPC::RLWNMo  ||
156        Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
157        MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
158     return 32 + MI->getOperand(3).getImm();
159 
160   if (Opcode == PPC::ANDIo) {
161     uint16_t Imm = MI->getOperand(2).getImm();
162     return 48 + countLeadingZeros(Imm);
163   }
164 
165   if (Opcode == PPC::CNTLZW  || Opcode == PPC::CNTLZWo ||
166       Opcode == PPC::CNTTZW  || Opcode == PPC::CNTTZWo ||
167       Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
168     // The result ranges from 0 to 32.
169     return 58;
170 
171   if (Opcode == PPC::CNTLZD  || Opcode == PPC::CNTLZDo ||
172       Opcode == PPC::CNTTZD  || Opcode == PPC::CNTTZDo)
173     // The result ranges from 0 to 64.
174     return 57;
175 
176   if (Opcode == PPC::LHZ   || Opcode == PPC::LHZX  ||
177       Opcode == PPC::LHZ8  || Opcode == PPC::LHZX8 ||
178       Opcode == PPC::LHZU  || Opcode == PPC::LHZUX ||
179       Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
180     return 48;
181 
182   if (Opcode == PPC::LBZ   || Opcode == PPC::LBZX  ||
183       Opcode == PPC::LBZ8  || Opcode == PPC::LBZX8 ||
184       Opcode == PPC::LBZU  || Opcode == PPC::LBZUX ||
185       Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
186     return 56;
187 
188   if (TII->isZeroExtended(*MI))
189     return 32;
190 
191   return 0;
192 }
193 
194 // This function maintains a map for the pairs <TOC Save Instr, Keep>
195 // Each time a new TOC save is encountered, it checks if any of the existing
196 // ones are dominated by the new one. If so, it marks the existing one as
197 // redundant by setting it's entry in the map as false. It then adds the new
198 // instruction to the map with either true or false depending on if any
199 // existing instructions dominated the new one.
200 void PPCMIPeephole::UpdateTOCSaves(
201   std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
202   assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
203   bool Keep = true;
204   for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
205     MachineInstr *CurrInst = It->first;
206     // If new instruction dominates an existing one, mark existing one as
207     // redundant.
208     if (It->second && MDT->dominates(MI, CurrInst))
209       It->second = false;
210     // Check if the new instruction is redundant.
211     if (MDT->dominates(CurrInst, MI)) {
212       Keep = false;
213       break;
214     }
215   }
216   // Add new instruction to map.
217   TOCSaves[MI] = Keep;
218 }
219 
220 // Perform peephole optimizations.
221 bool PPCMIPeephole::simplifyCode(void) {
222   bool Simplified = false;
223   MachineInstr* ToErase = nullptr;
224   std::map<MachineInstr *, bool> TOCSaves;
225   const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
226   NumFunctionsEnteredInMIPeephole++;
227   if (ConvertRegReg) {
228     // Fixed-point conversion of reg/reg instructions fed by load-immediate
229     // into reg/imm instructions. FIXME: This is expensive, control it with
230     // an option.
231     bool SomethingChanged = false;
232     do {
233       NumFixedPointIterations++;
234       SomethingChanged = false;
235       for (MachineBasicBlock &MBB : *MF) {
236         for (MachineInstr &MI : MBB) {
237           if (MI.isDebugInstr())
238             continue;
239 
240           if (TII->convertToImmediateForm(MI)) {
241             // We don't erase anything in case the def has other uses. Let DCE
242             // remove it if it can be removed.
243             LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
244             LLVM_DEBUG(MI.dump());
245             NumConvertedToImmediateForm++;
246             SomethingChanged = true;
247             Simplified = true;
248             continue;
249           }
250         }
251       }
252     } while (SomethingChanged && FixedPointRegToImm);
253   }
254 
255   for (MachineBasicBlock &MBB : *MF) {
256     for (MachineInstr &MI : MBB) {
257 
258       // If the previous instruction was marked for elimination,
259       // remove it now.
260       if (ToErase) {
261         ToErase->eraseFromParent();
262         ToErase = nullptr;
263       }
264 
265       // Ignore debug instructions.
266       if (MI.isDebugInstr())
267         continue;
268 
269       // Per-opcode peepholes.
270       switch (MI.getOpcode()) {
271 
272       default:
273         break;
274 
275       case PPC::STD: {
276         MachineFrameInfo &MFI = MF->getFrameInfo();
277         if (MFI.hasVarSizedObjects() ||
278             !MF->getSubtarget<PPCSubtarget>().isELFv2ABI())
279           break;
280         // When encountering a TOC save instruction, call UpdateTOCSaves
281         // to add it to the TOCSaves map and mark any existing TOC saves
282         // it dominates as redundant.
283         if (TII->isTOCSaveMI(MI))
284           UpdateTOCSaves(TOCSaves, &MI);
285         break;
286       }
287       case PPC::XXPERMDI: {
288         // Perform simplifications of 2x64 vector swaps and splats.
289         // A swap is identified by an immediate value of 2, and a splat
290         // is identified by an immediate value of 0 or 3.
291         int Immed = MI.getOperand(3).getImm();
292 
293         if (Immed != 1) {
294 
295           // For each of these simplifications, we need the two source
296           // regs to match.  Unfortunately, MachineCSE ignores COPY and
297           // SUBREG_TO_REG, so for example we can see
298           //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
299           // We have to look through chains of COPY and SUBREG_TO_REG
300           // to find the real source values for comparison.
301           unsigned TrueReg1 =
302             TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
303           unsigned TrueReg2 =
304             TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
305 
306           if (TrueReg1 == TrueReg2
307               && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
308             MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
309             unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
310 
311             // If this is a splat fed by a splatting load, the splat is
312             // redundant. Replace with a copy. This doesn't happen directly due
313             // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
314             // a load of a double to a vector of 64-bit integers.
315             auto isConversionOfLoadAndSplat = [=]() -> bool {
316               if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
317                 return false;
318               unsigned DefReg =
319                 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
320               if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
321                 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
322                 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
323                   return true;
324               }
325               return false;
326             };
327             if (DefMI && (Immed == 0 || Immed == 3)) {
328               if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
329                 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
330                                      "to load-and-splat/copy: ");
331                 LLVM_DEBUG(MI.dump());
332                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
333                         MI.getOperand(0).getReg())
334                     .add(MI.getOperand(1));
335                 ToErase = &MI;
336                 Simplified = true;
337               }
338             }
339 
340             // If this is a splat or a swap fed by another splat, we
341             // can replace it with a copy.
342             if (DefOpc == PPC::XXPERMDI) {
343               unsigned FeedImmed = DefMI->getOperand(3).getImm();
344               unsigned FeedReg1 =
345                 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
346               unsigned FeedReg2 =
347                 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
348 
349               if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
350                 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
351                                      "to splat/copy: ");
352                 LLVM_DEBUG(MI.dump());
353                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
354                         MI.getOperand(0).getReg())
355                     .add(MI.getOperand(1));
356                 ToErase = &MI;
357                 Simplified = true;
358               }
359 
360               // If this is a splat fed by a swap, we can simplify modify
361               // the splat to splat the other value from the swap's input
362               // parameter.
363               else if ((Immed == 0 || Immed == 3)
364                        && FeedImmed == 2 && FeedReg1 == FeedReg2) {
365                 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
366                 LLVM_DEBUG(MI.dump());
367                 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
368                 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
369                 MI.getOperand(3).setImm(3 - Immed);
370                 Simplified = true;
371               }
372 
373               // If this is a swap fed by a swap, we can replace it
374               // with a copy from the first swap's input.
375               else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
376                 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
377                 LLVM_DEBUG(MI.dump());
378                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
379                         MI.getOperand(0).getReg())
380                     .add(DefMI->getOperand(1));
381                 ToErase = &MI;
382                 Simplified = true;
383               }
384             } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
385                        (DefMI->getOperand(2).getImm() == 0 ||
386                         DefMI->getOperand(2).getImm() == 3)) {
387               // Splat fed by another splat - switch the output of the first
388               // and remove the second.
389               DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
390               ToErase = &MI;
391               Simplified = true;
392               LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
393               LLVM_DEBUG(MI.dump());
394             }
395           }
396         }
397         break;
398       }
399       case PPC::VSPLTB:
400       case PPC::VSPLTH:
401       case PPC::XXSPLTW: {
402         unsigned MyOpcode = MI.getOpcode();
403         unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
404         unsigned TrueReg =
405           TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
406         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
407           break;
408         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
409         if (!DefMI)
410           break;
411         unsigned DefOpcode = DefMI->getOpcode();
412         auto isConvertOfSplat = [=]() -> bool {
413           if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
414             return false;
415           unsigned ConvReg = DefMI->getOperand(1).getReg();
416           if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
417             return false;
418           MachineInstr *Splt = MRI->getVRegDef(ConvReg);
419           return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
420             Splt->getOpcode() == PPC::XXSPLTW);
421         };
422         bool AlreadySplat = (MyOpcode == DefOpcode) ||
423           (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
424           (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
425           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
426           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
427           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
428           (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
429         // If the instruction[s] that feed this splat have already splat
430         // the value, this splat is redundant.
431         if (AlreadySplat) {
432           LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
433           LLVM_DEBUG(MI.dump());
434           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
435                   MI.getOperand(0).getReg())
436               .add(MI.getOperand(OpNo));
437           ToErase = &MI;
438           Simplified = true;
439         }
440         // Splat fed by a shift. Usually when we align value to splat into
441         // vector element zero.
442         if (DefOpcode == PPC::XXSLDWI) {
443           unsigned ShiftRes = DefMI->getOperand(0).getReg();
444           unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
445           unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
446           unsigned ShiftImm = DefMI->getOperand(3).getImm();
447           unsigned SplatImm = MI.getOperand(2).getImm();
448           if (ShiftOp1 == ShiftOp2) {
449             unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
450             if (MRI->hasOneNonDBGUse(ShiftRes)) {
451               LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
452               LLVM_DEBUG(DefMI->dump());
453               ToErase = DefMI;
454             }
455             Simplified = true;
456             LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
457                               << " to " << NewElem << " in instruction: ");
458             LLVM_DEBUG(MI.dump());
459             MI.getOperand(1).setReg(ShiftOp1);
460             MI.getOperand(2).setImm(NewElem);
461           }
462         }
463         break;
464       }
465       case PPC::XVCVDPSP: {
466         // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
467         unsigned TrueReg =
468           TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
469         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
470           break;
471         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
472 
473         // This can occur when building a vector of single precision or integer
474         // values.
475         if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
476           unsigned DefsReg1 =
477             TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
478           unsigned DefsReg2 =
479             TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
480           if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
481               !TargetRegisterInfo::isVirtualRegister(DefsReg2))
482             break;
483           MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
484           MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
485 
486           if (!P1 || !P2)
487             break;
488 
489           // Remove the passed FRSP instruction if it only feeds this MI and
490           // set any uses of that FRSP (in this MI) to the source of the FRSP.
491           auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
492             if (RoundInstr->getOpcode() == PPC::FRSP &&
493                 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
494               Simplified = true;
495               unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
496               unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
497               MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
498               for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
499                 if (Use.getOperand(i).isReg() &&
500                     Use.getOperand(i).getReg() == FRSPDefines)
501                   Use.getOperand(i).setReg(ConvReg1);
502               LLVM_DEBUG(dbgs() << "Removing redundant FRSP:\n");
503               LLVM_DEBUG(RoundInstr->dump());
504               LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
505               LLVM_DEBUG(MI.dump());
506               LLVM_DEBUG(dbgs() << "Through instruction:\n");
507               LLVM_DEBUG(DefMI->dump());
508               RoundInstr->eraseFromParent();
509             }
510           };
511 
512           // If the input to XVCVDPSP is a vector that was built (even
513           // partially) out of FRSP's, the FRSP(s) can safely be removed
514           // since this instruction performs the same operation.
515           if (P1 != P2) {
516             removeFRSPIfPossible(P1);
517             removeFRSPIfPossible(P2);
518             break;
519           }
520           removeFRSPIfPossible(P1);
521         }
522         break;
523       }
524       case PPC::EXTSH:
525       case PPC::EXTSH8:
526       case PPC::EXTSH8_32_64: {
527         if (!EnableSExtElimination) break;
528         unsigned NarrowReg = MI.getOperand(1).getReg();
529         if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
530           break;
531 
532         MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
533         // If we've used a zero-extending load that we will sign-extend,
534         // just do a sign-extending load.
535         if (SrcMI->getOpcode() == PPC::LHZ ||
536             SrcMI->getOpcode() == PPC::LHZX) {
537           if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
538             break;
539           auto is64Bit = [] (unsigned Opcode) {
540             return Opcode == PPC::EXTSH8;
541           };
542           auto isXForm = [] (unsigned Opcode) {
543             return Opcode == PPC::LHZX;
544           };
545           auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
546             if (is64Bit)
547               if (isXForm) return PPC::LHAX8;
548               else         return PPC::LHA8;
549             else
550               if (isXForm) return PPC::LHAX;
551               else         return PPC::LHA;
552           };
553           unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
554                                        isXForm(SrcMI->getOpcode()));
555           LLVM_DEBUG(dbgs() << "Zero-extending load\n");
556           LLVM_DEBUG(SrcMI->dump());
557           LLVM_DEBUG(dbgs() << "and sign-extension\n");
558           LLVM_DEBUG(MI.dump());
559           LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
560           SrcMI->setDesc(TII->get(Opc));
561           SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
562           ToErase = &MI;
563           Simplified = true;
564           NumEliminatedSExt++;
565         }
566         break;
567       }
568       case PPC::EXTSW:
569       case PPC::EXTSW_32:
570       case PPC::EXTSW_32_64: {
571         if (!EnableSExtElimination) break;
572         unsigned NarrowReg = MI.getOperand(1).getReg();
573         if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
574           break;
575 
576         MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
577         // If we've used a zero-extending load that we will sign-extend,
578         // just do a sign-extending load.
579         if (SrcMI->getOpcode() == PPC::LWZ ||
580             SrcMI->getOpcode() == PPC::LWZX) {
581           if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
582             break;
583           auto is64Bit = [] (unsigned Opcode) {
584             return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
585           };
586           auto isXForm = [] (unsigned Opcode) {
587             return Opcode == PPC::LWZX;
588           };
589           auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
590             if (is64Bit)
591               if (isXForm) return PPC::LWAX;
592               else         return PPC::LWA;
593             else
594               if (isXForm) return PPC::LWAX_32;
595               else         return PPC::LWA_32;
596           };
597           unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
598                                        isXForm(SrcMI->getOpcode()));
599           LLVM_DEBUG(dbgs() << "Zero-extending load\n");
600           LLVM_DEBUG(SrcMI->dump());
601           LLVM_DEBUG(dbgs() << "and sign-extension\n");
602           LLVM_DEBUG(MI.dump());
603           LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
604           SrcMI->setDesc(TII->get(Opc));
605           SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
606           ToErase = &MI;
607           Simplified = true;
608           NumEliminatedSExt++;
609         } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
610                    TII->isSignExtended(*SrcMI)) {
611           // We can eliminate EXTSW if the input is known to be already
612           // sign-extended.
613           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
614           unsigned TmpReg =
615             MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
616           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
617                   TmpReg);
618           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
619                   MI.getOperand(0).getReg())
620               .addReg(TmpReg)
621               .addReg(NarrowReg)
622               .addImm(PPC::sub_32);
623           ToErase = &MI;
624           Simplified = true;
625           NumEliminatedSExt++;
626         }
627         break;
628       }
629       case PPC::RLDICL: {
630         // We can eliminate RLDICL (e.g. for zero-extension)
631         // if all bits to clear are already zero in the input.
632         // This code assume following code sequence for zero-extension.
633         //   %6 = COPY %5:sub_32; (optional)
634         //   %8 = IMPLICIT_DEF;
635         //   %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
636         if (!EnableZExtElimination) break;
637 
638         if (MI.getOperand(2).getImm() != 0)
639           break;
640 
641         unsigned SrcReg = MI.getOperand(1).getReg();
642         if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
643           break;
644 
645         MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
646         if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
647               SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
648           break;
649 
650         MachineInstr *ImpDefMI, *SubRegMI;
651         ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
652         SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
653         if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
654 
655         SrcMI = SubRegMI;
656         if (SubRegMI->getOpcode() == PPC::COPY) {
657           unsigned CopyReg = SubRegMI->getOperand(1).getReg();
658           if (TargetRegisterInfo::isVirtualRegister(CopyReg))
659             SrcMI = MRI->getVRegDef(CopyReg);
660         }
661 
662         unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
663         if (MI.getOperand(3).getImm() <= KnownZeroCount) {
664           LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
665           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
666                   MI.getOperand(0).getReg())
667               .addReg(SrcReg);
668           ToErase = &MI;
669           Simplified = true;
670           NumEliminatedZExt++;
671         }
672         break;
673       }
674 
675       // TODO: Any instruction that has an immediate form fed only by a PHI
676       // whose operands are all load immediate can be folded away. We currently
677       // do this for ADD instructions, but should expand it to arithmetic and
678       // binary instructions with immediate forms in the future.
679       case PPC::ADD4:
680       case PPC::ADD8: {
681         auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
682           assert(PhiOp && "Invalid Operand!");
683           MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
684 
685           return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
686                  MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
687         };
688 
689         auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
690                                             MachineOperand *PhiOp) {
691           assert(PhiOp && "Invalid Operand!");
692           assert(DominatorOp && "Invalid Operand!");
693           MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
694           MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
695 
696           // Note: the vregs only show up at odd indices position of PHI Node,
697           // the even indices position save the BB info.
698           for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
699             MachineInstr *LiMI =
700                 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
701             if (!LiMI ||
702                 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
703                 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
704                 !MDT->dominates(DefDomMI, LiMI))
705               return false;
706           }
707 
708           return true;
709         };
710 
711         MachineOperand Op1 = MI.getOperand(1);
712         MachineOperand Op2 = MI.getOperand(2);
713         if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
714           std::swap(Op1, Op2);
715         else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
716           break; // We don't have an ADD fed by LI's that can be transformed
717 
718         // Now we know that Op1 is the PHI node and Op2 is the dominator
719         unsigned DominatorReg = Op2.getReg();
720 
721         const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
722                                              ? &PPC::G8RC_and_G8RC_NOX0RegClass
723                                              : &PPC::GPRC_and_GPRC_NOR0RegClass;
724         MRI->setRegClass(DominatorReg, TRC);
725 
726         // replace LIs with ADDIs
727         MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
728         for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
729           MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
730           LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
731           LLVM_DEBUG(LiMI->dump());
732 
733           // There could be repeated registers in the PHI, e.g: %1 =
734           // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
735           // already replaced the def instruction, skip.
736           if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
737             continue;
738 
739           assert((LiMI->getOpcode() == PPC::LI ||
740                   LiMI->getOpcode() == PPC::LI8) &&
741                  "Invalid Opcode!");
742           auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
743           LiMI->RemoveOperand(1);                    // remove the imm of LI
744           LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
745                                                               : PPC::ADDI8));
746           MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
747               .addReg(DominatorReg)
748               .addImm(LiImm); // restore the imm of LI
749           LLVM_DEBUG(LiMI->dump());
750         }
751 
752         // Replace ADD with COPY
753         LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
754         LLVM_DEBUG(MI.dump());
755         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
756                 MI.getOperand(0).getReg())
757             .add(Op1);
758         ToErase = &MI;
759         Simplified = true;
760         NumOptADDLIs++;
761         break;
762       }
763       case PPC::RLDICR: {
764         emitRLDICWhenLoweringJumpTables(MI);
765         break;
766       }
767       }
768     }
769 
770     // If the last instruction was marked for elimination,
771     // remove it now.
772     if (ToErase) {
773       ToErase->eraseFromParent();
774       ToErase = nullptr;
775     }
776   }
777 
778   // Eliminate all the TOC save instructions which are redundant.
779   Simplified |= eliminateRedundantTOCSaves(TOCSaves);
780   // We try to eliminate redundant compare instruction.
781   Simplified |= eliminateRedundantCompare();
782 
783   return Simplified;
784 }
785 
786 // helper functions for eliminateRedundantCompare
787 static bool isEqOrNe(MachineInstr *BI) {
788   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
789   unsigned PredCond = PPC::getPredicateCondition(Pred);
790   return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
791 }
792 
793 static bool isSupportedCmpOp(unsigned opCode) {
794   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD  ||
795           opCode == PPC::CMPLW  || opCode == PPC::CMPW  ||
796           opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
797           opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
798 }
799 
800 static bool is64bitCmpOp(unsigned opCode) {
801   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD ||
802           opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
803 }
804 
805 static bool isSignedCmpOp(unsigned opCode) {
806   return (opCode == PPC::CMPD  || opCode == PPC::CMPW ||
807           opCode == PPC::CMPDI || opCode == PPC::CMPWI);
808 }
809 
810 static unsigned getSignedCmpOpCode(unsigned opCode) {
811   if (opCode == PPC::CMPLD)  return PPC::CMPD;
812   if (opCode == PPC::CMPLW)  return PPC::CMPW;
813   if (opCode == PPC::CMPLDI) return PPC::CMPDI;
814   if (opCode == PPC::CMPLWI) return PPC::CMPWI;
815   return opCode;
816 }
817 
818 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or
819 // (LT x) to (LE x-1)
820 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
821   uint64_t Imm = CMPI->getOperand(2).getImm();
822   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
823   if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
824     return 0;
825 
826   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
827   unsigned PredCond = PPC::getPredicateCondition(Pred);
828   unsigned PredHint = PPC::getPredicateHint(Pred);
829   if (PredCond == PPC::PRED_GE)
830     return PPC::getPredicate(PPC::PRED_GT, PredHint);
831   if (PredCond == PPC::PRED_LT)
832     return PPC::getPredicate(PPC::PRED_LE, PredHint);
833 
834   return 0;
835 }
836 
837 // We can increment immediate x in (GT x) by changing it to (GE x+1) or
838 // (LE x) to (LT x+1)
839 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
840   uint64_t Imm = CMPI->getOperand(2).getImm();
841   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
842   if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
843     return 0;
844 
845   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
846   unsigned PredCond = PPC::getPredicateCondition(Pred);
847   unsigned PredHint = PPC::getPredicateHint(Pred);
848   if (PredCond == PPC::PRED_GT)
849     return PPC::getPredicate(PPC::PRED_GE, PredHint);
850   if (PredCond == PPC::PRED_LE)
851     return PPC::getPredicate(PPC::PRED_LT, PredHint);
852 
853   return 0;
854 }
855 
856 // This takes a Phi node and returns a register value for the specified BB.
857 static unsigned getIncomingRegForBlock(MachineInstr *Phi,
858                                        MachineBasicBlock *MBB) {
859   for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
860     MachineOperand &MO = Phi->getOperand(I);
861     if (MO.getMBB() == MBB)
862       return Phi->getOperand(I-1).getReg();
863   }
864   llvm_unreachable("invalid src basic block for this Phi node\n");
865   return 0;
866 }
867 
868 // This function tracks the source of the register through register copy.
869 // If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
870 // assuming that the control comes from BB1 into BB2.
871 static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
872                            MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
873   unsigned SrcReg = Reg;
874   while (1) {
875     unsigned NextReg = SrcReg;
876     MachineInstr *Inst = MRI->getVRegDef(SrcReg);
877     if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
878       NextReg = getIncomingRegForBlock(Inst, BB1);
879       // We track through PHI only once to avoid infinite loop.
880       BB1 = nullptr;
881     }
882     else if (Inst->isFullCopy())
883       NextReg = Inst->getOperand(1).getReg();
884     if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg))
885       break;
886     SrcReg = NextReg;
887   }
888   return SrcReg;
889 }
890 
891 static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
892                                           MachineBasicBlock *&PredMBB,
893                                           MachineBasicBlock *&MBBtoMoveCmp,
894                                           MachineRegisterInfo *MRI) {
895 
896   auto isEligibleBB = [&](MachineBasicBlock &BB) {
897     auto BII = BB.getFirstInstrTerminator();
898     // We optimize BBs ending with a conditional branch.
899     // We check only for BCC here, not BCCLR, because BCCLR
900     // will be formed only later in the pipeline.
901     if (BB.succ_size() == 2 &&
902         BII != BB.instr_end() &&
903         (*BII).getOpcode() == PPC::BCC &&
904         (*BII).getOperand(1).isReg()) {
905       // We optimize only if the condition code is used only by one BCC.
906       unsigned CndReg = (*BII).getOperand(1).getReg();
907       if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
908           !MRI->hasOneNonDBGUse(CndReg))
909         return false;
910 
911       MachineInstr *CMPI = MRI->getVRegDef(CndReg);
912       // We assume compare and branch are in the same BB for ease of analysis.
913       if (CMPI->getParent() != &BB)
914         return false;
915 
916       // We skip this BB if a physical register is used in comparison.
917       for (MachineOperand &MO : CMPI->operands())
918         if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
919           return false;
920 
921       return true;
922     }
923     return false;
924   };
925 
926   // If this BB has more than one successor, we can create a new BB and
927   // move the compare instruction in the new BB.
928   // So far, we do not move compare instruction to a BB having multiple
929   // successors to avoid potentially increasing code size.
930   auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
931     return BB.succ_size() == 1;
932   };
933 
934   if (!isEligibleBB(MBB))
935     return false;
936 
937   unsigned NumPredBBs = MBB.pred_size();
938   if (NumPredBBs == 1) {
939     MachineBasicBlock *TmpMBB = *MBB.pred_begin();
940     if (isEligibleBB(*TmpMBB)) {
941       PredMBB = TmpMBB;
942       MBBtoMoveCmp = nullptr;
943       return true;
944     }
945   }
946   else if (NumPredBBs == 2) {
947     // We check for partially redundant case.
948     // So far, we support cases with only two predecessors
949     // to avoid increasing the number of instructions.
950     MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
951     MachineBasicBlock *Pred1MBB = *PI;
952     MachineBasicBlock *Pred2MBB = *(PI+1);
953 
954     if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
955       // We assume Pred1MBB is the BB containing the compare to be merged and
956       // Pred2MBB is the BB to which we will append a compare instruction.
957       // Hence we can proceed as is.
958     }
959     else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
960       // We need to swap Pred1MBB and Pred2MBB to canonicalize.
961       std::swap(Pred1MBB, Pred2MBB);
962     }
963     else return false;
964 
965     // Here, Pred2MBB is the BB to which we need to append a compare inst.
966     // We cannot move the compare instruction if operands are not available
967     // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
968     MachineInstr *BI = &*MBB.getFirstInstrTerminator();
969     MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
970     for (int I = 1; I <= 2; I++)
971       if (CMPI->getOperand(I).isReg()) {
972         MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
973         if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
974           return false;
975       }
976 
977     PredMBB = Pred1MBB;
978     MBBtoMoveCmp = Pred2MBB;
979     return true;
980   }
981 
982   return false;
983 }
984 
985 // This function will iterate over the input map containing a pair of TOC save
986 // instruction and a flag. The flag will be set to false if the TOC save is
987 // proven redundant. This function will erase from the basic block all the TOC
988 // saves marked as redundant.
989 bool PPCMIPeephole::eliminateRedundantTOCSaves(
990     std::map<MachineInstr *, bool> &TOCSaves) {
991   bool Simplified = false;
992   int NumKept = 0;
993   for (auto TOCSave : TOCSaves) {
994     if (!TOCSave.second) {
995       TOCSave.first->eraseFromParent();
996       RemoveTOCSave++;
997       Simplified = true;
998     } else {
999       NumKept++;
1000     }
1001   }
1002 
1003   if (NumKept > 1)
1004     MultiTOCSaves++;
1005 
1006   return Simplified;
1007 }
1008 
1009 // If multiple conditional branches are executed based on the (essentially)
1010 // same comparison, we merge compare instructions into one and make multiple
1011 // conditional branches on this comparison.
1012 // For example,
1013 //   if (a == 0) { ... }
1014 //   else if (a < 0) { ... }
1015 // can be executed by one compare and two conditional branches instead of
1016 // two pairs of a compare and a conditional branch.
1017 //
1018 // This method merges two compare instructions in two MBBs and modifies the
1019 // compare and conditional branch instructions if needed.
1020 // For the above example, the input for this pass looks like:
1021 //   cmplwi r3, 0
1022 //   beq    0, .LBB0_3
1023 //   cmpwi  r3, -1
1024 //   bgt    0, .LBB0_4
1025 // So, before merging two compares, we need to modify these instructions as
1026 //   cmpwi  r3, 0       ; cmplwi and cmpwi yield same result for beq
1027 //   beq    0, .LBB0_3
1028 //   cmpwi  r3, 0       ; greather than -1 means greater or equal to 0
1029 //   bge    0, .LBB0_4
1030 
1031 bool PPCMIPeephole::eliminateRedundantCompare(void) {
1032   bool Simplified = false;
1033 
1034   for (MachineBasicBlock &MBB2 : *MF) {
1035     MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1036 
1037     // For fully redundant case, we select two basic blocks MBB1 and MBB2
1038     // as an optimization target if
1039     // - both MBBs end with a conditional branch,
1040     // - MBB1 is the only predecessor of MBB2, and
1041     // - compare does not take a physical register as a operand in both MBBs.
1042     // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1043     //
1044     // As partially redundant case, we additionally handle if MBB2 has one
1045     // additional predecessor, which has only one successor (MBB2).
1046     // In this case, we move the compare instruction originally in MBB2 into
1047     // MBBtoMoveCmp. This partially redundant case is typically appear by
1048     // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1049     //
1050     // Overview of CFG of related basic blocks
1051     // Fully redundant case        Partially redundant case
1052     //   --------                   ----------------  --------
1053     //   | MBB1 | (w/ 2 succ)       | MBBtoMoveCmp |  | MBB1 | (w/ 2 succ)
1054     //   --------                   ----------------  --------
1055     //      |    \                     (w/ 1 succ) \     |    \
1056     //      |     \                                 \    |     \
1057     //      |                                        \   |
1058     //   --------                                     --------
1059     //   | MBB2 | (w/ 1 pred                          | MBB2 | (w/ 2 pred
1060     //   -------- and 2 succ)                         -------- and 2 succ)
1061     //      |    \                                       |    \
1062     //      |     \                                      |     \
1063     //
1064     if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1065       continue;
1066 
1067     MachineInstr *BI1   = &*MBB1->getFirstInstrTerminator();
1068     MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1069 
1070     MachineInstr *BI2   = &*MBB2.getFirstInstrTerminator();
1071     MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1072     bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1073 
1074     // We cannot optimize an unsupported compare opcode or
1075     // a mix of 32-bit and 64-bit comaprisons
1076     if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1077         !isSupportedCmpOp(CMPI2->getOpcode()) ||
1078         is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1079       continue;
1080 
1081     unsigned NewOpCode = 0;
1082     unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1083     int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1084     bool SwapOperands = false;
1085 
1086     if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1087       // Typically, unsigned comparison is used for equality check, but
1088       // we replace it with a signed comparison if the comparison
1089       // to be merged is a signed comparison.
1090       // In other cases of opcode mismatch, we cannot optimize this.
1091 
1092       // We cannot change opcode when comparing against an immediate
1093       // if the most significant bit of the immediate is one
1094       // due to the difference in sign extension.
1095       auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1096         if (!I->getOperand(2).isImm())
1097           return false;
1098         int16_t Imm = (int16_t)I->getOperand(2).getImm();
1099         return Imm < 0;
1100       };
1101 
1102       if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1103           CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1104         NewOpCode = CMPI1->getOpcode();
1105       else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1106                getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1107         NewOpCode = CMPI2->getOpcode();
1108       else continue;
1109     }
1110 
1111     if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1112       // In case of comparisons between two registers, these two registers
1113       // must be same to merge two comparisons.
1114       unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1115                                          nullptr, nullptr, MRI);
1116       unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1117                                          nullptr, nullptr, MRI);
1118       unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1119                                          MBB1, &MBB2, MRI);
1120       unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1121                                          MBB1, &MBB2, MRI);
1122 
1123       if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1124         // Same pair of registers in the same order; ready to merge as is.
1125       }
1126       else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1127         // Same pair of registers in different order.
1128         // We reverse the predicate to merge compare instructions.
1129         PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
1130         NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1131         // In case of partial redundancy, we need to swap operands
1132         // in another compare instruction.
1133         SwapOperands = true;
1134       }
1135       else continue;
1136     }
1137     else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1138       // In case of comparisons between a register and an immediate,
1139       // the operand register must be same for two compare instructions.
1140       unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1141                                          nullptr, nullptr, MRI);
1142       unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1143                                          MBB1, &MBB2, MRI);
1144       if (Cmp1Operand1 != Cmp2Operand1)
1145         continue;
1146 
1147       NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1148       NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1149 
1150       // If immediate are not same, we try to adjust by changing predicate;
1151       // e.g. GT imm means GE (imm+1).
1152       if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1153         int Diff = Imm1 - Imm2;
1154         if (Diff < -2 || Diff > 2)
1155           continue;
1156 
1157         unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1158         unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1159         unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1160         unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1161         if (Diff == 2) {
1162           if (PredToInc2 && PredToDec1) {
1163             NewPredicate2 = PredToInc2;
1164             NewPredicate1 = PredToDec1;
1165             NewImm2++;
1166             NewImm1--;
1167           }
1168         }
1169         else if (Diff == 1) {
1170           if (PredToInc2) {
1171             NewImm2++;
1172             NewPredicate2 = PredToInc2;
1173           }
1174           else if (PredToDec1) {
1175             NewImm1--;
1176             NewPredicate1 = PredToDec1;
1177           }
1178         }
1179         else if (Diff == -1) {
1180           if (PredToDec2) {
1181             NewImm2--;
1182             NewPredicate2 = PredToDec2;
1183           }
1184           else if (PredToInc1) {
1185             NewImm1++;
1186             NewPredicate1 = PredToInc1;
1187           }
1188         }
1189         else if (Diff == -2) {
1190           if (PredToDec2 && PredToInc1) {
1191             NewPredicate2 = PredToDec2;
1192             NewPredicate1 = PredToInc1;
1193             NewImm2--;
1194             NewImm1++;
1195           }
1196         }
1197       }
1198 
1199       // We cannot merge two compares if the immediates are not same.
1200       if (NewImm2 != NewImm1)
1201         continue;
1202     }
1203 
1204     LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1205     LLVM_DEBUG(CMPI1->dump());
1206     LLVM_DEBUG(BI1->dump());
1207     LLVM_DEBUG(CMPI2->dump());
1208     LLVM_DEBUG(BI2->dump());
1209 
1210     // We adjust opcode, predicates and immediate as we determined above.
1211     if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1212       CMPI1->setDesc(TII->get(NewOpCode));
1213     }
1214     if (NewPredicate1) {
1215       BI1->getOperand(0).setImm(NewPredicate1);
1216     }
1217     if (NewPredicate2) {
1218       BI2->getOperand(0).setImm(NewPredicate2);
1219     }
1220     if (NewImm1 != Imm1) {
1221       CMPI1->getOperand(2).setImm(NewImm1);
1222     }
1223 
1224     if (IsPartiallyRedundant) {
1225       // We touch up the compare instruction in MBB2 and move it to
1226       // a previous BB to handle partially redundant case.
1227       if (SwapOperands) {
1228         unsigned Op1 = CMPI2->getOperand(1).getReg();
1229         unsigned Op2 = CMPI2->getOperand(2).getReg();
1230         CMPI2->getOperand(1).setReg(Op2);
1231         CMPI2->getOperand(2).setReg(Op1);
1232       }
1233       if (NewImm2 != Imm2)
1234         CMPI2->getOperand(2).setImm(NewImm2);
1235 
1236       for (int I = 1; I <= 2; I++) {
1237         if (CMPI2->getOperand(I).isReg()) {
1238           MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1239           if (Inst->getParent() != &MBB2)
1240             continue;
1241 
1242           assert(Inst->getOpcode() == PPC::PHI &&
1243                  "We cannot support if an operand comes from this BB.");
1244           unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1245           CMPI2->getOperand(I).setReg(SrcReg);
1246         }
1247       }
1248       auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1249       MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1250 
1251       DebugLoc DL = CMPI2->getDebugLoc();
1252       unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1253       BuildMI(MBB2, MBB2.begin(), DL,
1254               TII->get(PPC::PHI), NewVReg)
1255         .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1256         .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1257       BI2->getOperand(1).setReg(NewVReg);
1258     }
1259     else {
1260       // We finally eliminate compare instruction in MBB2.
1261       BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1262       CMPI2->eraseFromParent();
1263     }
1264     BI2->getOperand(1).setIsKill(true);
1265     BI1->getOperand(1).setIsKill(false);
1266 
1267     LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1268     LLVM_DEBUG(CMPI1->dump());
1269     LLVM_DEBUG(BI1->dump());
1270     LLVM_DEBUG(BI2->dump());
1271     if (IsPartiallyRedundant) {
1272       LLVM_DEBUG(dbgs() << "The following compare is moved into "
1273                         << printMBBReference(*MBBtoMoveCmp)
1274                         << " to handle partial redundancy.\n");
1275       LLVM_DEBUG(CMPI2->dump());
1276     }
1277 
1278     Simplified = true;
1279   }
1280 
1281   return Simplified;
1282 }
1283 
1284 // We miss the opportunity to emit an RLDIC when lowering jump tables
1285 // since ISEL sees only a single basic block. When selecting, the clear
1286 // and shift left will be in different blocks.
1287 void PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
1288   if (MI.getOpcode() != PPC::RLDICR)
1289     return;
1290 
1291   unsigned SrcReg = MI.getOperand(1).getReg();
1292   if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
1293     return;
1294 
1295   MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1296   if (SrcMI->getOpcode() != PPC::RLDICL)
1297     return;
1298 
1299   MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1300   MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1301   MachineOperand MOpSHMI = MI.getOperand(2);
1302   MachineOperand MOpMEMI = MI.getOperand(3);
1303   if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1304         MOpMEMI.isImm()))
1305     return;
1306 
1307   uint64_t SHSrc = MOpSHSrc.getImm();
1308   uint64_t MBSrc = MOpMBSrc.getImm();
1309   uint64_t SHMI = MOpSHMI.getImm();
1310   uint64_t MEMI = MOpMEMI.getImm();
1311   uint64_t NewSH = SHSrc + SHMI;
1312   uint64_t NewMB = MBSrc - SHMI;
1313   if (NewMB > 63 || NewSH > 63)
1314     return;
1315 
1316   // The bits cleared with RLDICL are [0, MBSrc).
1317   // The bits cleared with RLDICR are (MEMI, 63].
1318   // After the sequence, the bits cleared are:
1319   // [0, MBSrc-SHMI) and (MEMI, 63).
1320   //
1321   // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1322   if ((63 - NewSH) != MEMI)
1323     return;
1324 
1325   LLVM_DEBUG(dbgs() << "Converting pair: ");
1326   LLVM_DEBUG(SrcMI->dump());
1327   LLVM_DEBUG(MI.dump());
1328 
1329   MI.setDesc(TII->get(PPC::RLDIC));
1330   MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1331   MI.getOperand(2).setImm(NewSH);
1332   MI.getOperand(3).setImm(NewMB);
1333 
1334   LLVM_DEBUG(dbgs() << "To: ");
1335   LLVM_DEBUG(MI.dump());
1336   NumRotatesCollapsed++;
1337 }
1338 
1339 } // end default namespace
1340 
1341 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
1342                       "PowerPC MI Peephole Optimization", false, false)
1343 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
1344                     "PowerPC MI Peephole Optimization", false, false)
1345 
1346 char PPCMIPeephole::ID = 0;
1347 FunctionPass*
1348 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
1349 
1350