xref: /llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp (revision 614453b797210ab3822d78f1b907729c8be34ba6)
1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 //
10 // This pass performs peephole optimizations to clean up ugly code
11 // sequences at the MachineInstruction layer.  It runs at the end of
12 // the SSA phases, following VSX swap removal.  A pass of dead code
13 // elimination follows this one for quick clean-up of any dead
14 // instructions introduced here.  Although we could do this as callbacks
15 // from the generic peephole pass, this would have a couple of bad
16 // effects:  it might remove optimization opportunities for VSX swap
17 // removal, and it would miss cleanups made possible following VSX
18 // swap removal.
19 //
20 //===---------------------------------------------------------------------===//
21 
22 #include "PPC.h"
23 #include "PPCInstrBuilder.h"
24 #include "PPCInstrInfo.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/Support/Debug.h"
30 #include "MCTargetDesc/PPCPredicates.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "ppc-mi-peepholes"
35 
36 namespace llvm {
37   void initializePPCMIPeepholePass(PassRegistry&);
38 }
39 
40 namespace {
41 
42 struct PPCMIPeephole : public MachineFunctionPass {
43 
44   static char ID;
45   const PPCInstrInfo *TII;
46   MachineFunction *MF;
47   MachineRegisterInfo *MRI;
48 
49   PPCMIPeephole() : MachineFunctionPass(ID) {
50     initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
51   }
52 
53 private:
54   // Initialize class variables.
55   void initialize(MachineFunction &MFParm);
56 
57   // Perform peepholes.
58   bool simplifyCode(void);
59 
60   // Perform peepholes.
61   bool eliminateRedundantCompare(void);
62 
63   // Find the "true" register represented by SrcReg (following chains
64   // of copies and subreg_to_reg operations).
65   unsigned lookThruCopyLike(unsigned SrcReg);
66 
67 public:
68   // Main entry point for this pass.
69   bool runOnMachineFunction(MachineFunction &MF) override {
70     if (skipFunction(*MF.getFunction()))
71       return false;
72     initialize(MF);
73     return simplifyCode();
74   }
75 };
76 
77 // Initialize class variables.
78 void PPCMIPeephole::initialize(MachineFunction &MFParm) {
79   MF = &MFParm;
80   MRI = &MF->getRegInfo();
81   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
82   DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
83   DEBUG(MF->dump());
84 }
85 
86 // Perform peephole optimizations.
87 bool PPCMIPeephole::simplifyCode(void) {
88   bool Simplified = false;
89   MachineInstr* ToErase = nullptr;
90 
91   for (MachineBasicBlock &MBB : *MF) {
92     for (MachineInstr &MI : MBB) {
93 
94       // If the previous instruction was marked for elimination,
95       // remove it now.
96       if (ToErase) {
97         ToErase->eraseFromParent();
98         ToErase = nullptr;
99       }
100 
101       // Ignore debug instructions.
102       if (MI.isDebugValue())
103         continue;
104 
105       // Per-opcode peepholes.
106       switch (MI.getOpcode()) {
107 
108       default:
109         break;
110 
111       case PPC::XXPERMDI: {
112         // Perform simplifications of 2x64 vector swaps and splats.
113         // A swap is identified by an immediate value of 2, and a splat
114         // is identified by an immediate value of 0 or 3.
115         int Immed = MI.getOperand(3).getImm();
116 
117         if (Immed != 1) {
118 
119           // For each of these simplifications, we need the two source
120           // regs to match.  Unfortunately, MachineCSE ignores COPY and
121           // SUBREG_TO_REG, so for example we can see
122           //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
123           // We have to look through chains of COPY and SUBREG_TO_REG
124           // to find the real source values for comparison.
125           unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
126           unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
127 
128           if (TrueReg1 == TrueReg2
129               && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
130             MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
131             unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
132 
133             // If this is a splat fed by a splatting load, the splat is
134             // redundant. Replace with a copy. This doesn't happen directly due
135             // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
136             // a load of a double to a vector of 64-bit integers.
137             auto isConversionOfLoadAndSplat = [=]() -> bool {
138               if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
139                 return false;
140               unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
141               if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
142                 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
143                 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
144                   return true;
145               }
146               return false;
147             };
148             if (DefMI && (Immed == 0 || Immed == 3)) {
149               if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
150                 DEBUG(dbgs()
151                       << "Optimizing load-and-splat/splat "
152                       "to load-and-splat/copy: ");
153                 DEBUG(MI.dump());
154                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
155                         MI.getOperand(0).getReg())
156                     .add(MI.getOperand(1));
157                 ToErase = &MI;
158                 Simplified = true;
159               }
160             }
161 
162             // If this is a splat or a swap fed by another splat, we
163             // can replace it with a copy.
164             if (DefOpc == PPC::XXPERMDI) {
165               unsigned FeedImmed = DefMI->getOperand(3).getImm();
166               unsigned FeedReg1
167                 = lookThruCopyLike(DefMI->getOperand(1).getReg());
168               unsigned FeedReg2
169                 = lookThruCopyLike(DefMI->getOperand(2).getReg());
170 
171               if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
172                 DEBUG(dbgs()
173                       << "Optimizing splat/swap or splat/splat "
174                       "to splat/copy: ");
175                 DEBUG(MI.dump());
176                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
177                         MI.getOperand(0).getReg())
178                     .add(MI.getOperand(1));
179                 ToErase = &MI;
180                 Simplified = true;
181               }
182 
183               // If this is a splat fed by a swap, we can simplify modify
184               // the splat to splat the other value from the swap's input
185               // parameter.
186               else if ((Immed == 0 || Immed == 3)
187                        && FeedImmed == 2 && FeedReg1 == FeedReg2) {
188                 DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
189                 DEBUG(MI.dump());
190                 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
191                 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
192                 MI.getOperand(3).setImm(3 - Immed);
193                 Simplified = true;
194               }
195 
196               // If this is a swap fed by a swap, we can replace it
197               // with a copy from the first swap's input.
198               else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
199                 DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
200                 DEBUG(MI.dump());
201                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
202                         MI.getOperand(0).getReg())
203                     .add(DefMI->getOperand(1));
204                 ToErase = &MI;
205                 Simplified = true;
206               }
207             } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
208                        (DefMI->getOperand(2).getImm() == 0 ||
209                         DefMI->getOperand(2).getImm() == 3)) {
210               // Splat fed by another splat - switch the output of the first
211               // and remove the second.
212               DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
213               ToErase = &MI;
214               Simplified = true;
215               DEBUG(dbgs() << "Removing redundant splat: ");
216               DEBUG(MI.dump());
217             }
218           }
219         }
220         break;
221       }
222       case PPC::VSPLTB:
223       case PPC::VSPLTH:
224       case PPC::XXSPLTW: {
225         unsigned MyOpcode = MI.getOpcode();
226         unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
227         unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
228         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
229           break;
230         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
231         if (!DefMI)
232           break;
233         unsigned DefOpcode = DefMI->getOpcode();
234         auto isConvertOfSplat = [=]() -> bool {
235           if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
236             return false;
237           unsigned ConvReg = DefMI->getOperand(1).getReg();
238           if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
239             return false;
240           MachineInstr *Splt = MRI->getVRegDef(ConvReg);
241           return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
242             Splt->getOpcode() == PPC::XXSPLTW);
243         };
244         bool AlreadySplat = (MyOpcode == DefOpcode) ||
245           (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
246           (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
247           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
248           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
249           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
250           (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
251         // If the instruction[s] that feed this splat have already splat
252         // the value, this splat is redundant.
253         if (AlreadySplat) {
254           DEBUG(dbgs() << "Changing redundant splat to a copy: ");
255           DEBUG(MI.dump());
256           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
257                   MI.getOperand(0).getReg())
258               .add(MI.getOperand(OpNo));
259           ToErase = &MI;
260           Simplified = true;
261         }
262         // Splat fed by a shift. Usually when we align value to splat into
263         // vector element zero.
264         if (DefOpcode == PPC::XXSLDWI) {
265           unsigned ShiftRes = DefMI->getOperand(0).getReg();
266           unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
267           unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
268           unsigned ShiftImm = DefMI->getOperand(3).getImm();
269           unsigned SplatImm = MI.getOperand(2).getImm();
270           if (ShiftOp1 == ShiftOp2) {
271             unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
272             if (MRI->hasOneNonDBGUse(ShiftRes)) {
273               DEBUG(dbgs() << "Removing redundant shift: ");
274               DEBUG(DefMI->dump());
275               ToErase = DefMI;
276             }
277             Simplified = true;
278             DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
279                   " to " << NewElem << " in instruction: ");
280             DEBUG(MI.dump());
281             MI.getOperand(1).setReg(ShiftOp1);
282             MI.getOperand(2).setImm(NewElem);
283           }
284         }
285         break;
286       }
287       case PPC::XVCVDPSP: {
288         // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
289         unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
290         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
291           break;
292         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
293 
294         // This can occur when building a vector of single precision or integer
295         // values.
296         if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
297           unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
298           unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
299           if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
300               !TargetRegisterInfo::isVirtualRegister(DefsReg2))
301             break;
302           MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
303           MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
304 
305           if (!P1 || !P2)
306             break;
307 
308           // Remove the passed FRSP instruction if it only feeds this MI and
309           // set any uses of that FRSP (in this MI) to the source of the FRSP.
310           auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
311             if (RoundInstr->getOpcode() == PPC::FRSP &&
312                 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
313               Simplified = true;
314               unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
315               unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
316               MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
317               for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
318                 if (Use.getOperand(i).isReg() &&
319                     Use.getOperand(i).getReg() == FRSPDefines)
320                   Use.getOperand(i).setReg(ConvReg1);
321               DEBUG(dbgs() << "Removing redundant FRSP:\n");
322               DEBUG(RoundInstr->dump());
323               DEBUG(dbgs() << "As it feeds instruction:\n");
324               DEBUG(MI.dump());
325               DEBUG(dbgs() << "Through instruction:\n");
326               DEBUG(DefMI->dump());
327               RoundInstr->eraseFromParent();
328             }
329           };
330 
331           // If the input to XVCVDPSP is a vector that was built (even
332           // partially) out of FRSP's, the FRSP(s) can safely be removed
333           // since this instruction performs the same operation.
334           if (P1 != P2) {
335             removeFRSPIfPossible(P1);
336             removeFRSPIfPossible(P2);
337             break;
338           }
339           removeFRSPIfPossible(P1);
340         }
341         break;
342       }
343       }
344     }
345     // If the last instruction was marked for elimination,
346     // remove it now.
347     if (ToErase) {
348       ToErase->eraseFromParent();
349       ToErase = nullptr;
350     }
351   }
352 
353   // We try to eliminate redundant compare instruction.
354   Simplified |= eliminateRedundantCompare();
355 
356   return Simplified;
357 }
358 
359 // helper functions for eliminateRedundantCompare
360 static bool isEqOrNe(MachineInstr *BI) {
361   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
362   unsigned PredCond = PPC::getPredicateCondition(Pred);
363   return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
364 }
365 
366 static bool isSupportedCmpOp(unsigned opCode) {
367   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD  ||
368           opCode == PPC::CMPLW  || opCode == PPC::CMPW  ||
369           opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
370           opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
371 }
372 
373 static bool is64bitCmpOp(unsigned opCode) {
374   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD ||
375           opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
376 }
377 
378 static bool isSignedCmpOp(unsigned opCode) {
379   return (opCode == PPC::CMPD  || opCode == PPC::CMPW ||
380           opCode == PPC::CMPDI || opCode == PPC::CMPWI);
381 }
382 
383 static unsigned getSignedCmpOpCode(unsigned opCode) {
384   if (opCode == PPC::CMPLD)  return PPC::CMPD;
385   if (opCode == PPC::CMPLW)  return PPC::CMPW;
386   if (opCode == PPC::CMPLDI) return PPC::CMPDI;
387   if (opCode == PPC::CMPLWI) return PPC::CMPWI;
388   return opCode;
389 }
390 
391 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or
392 // (LT x) to (LE x-1)
393 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
394   uint64_t Imm = CMPI->getOperand(2).getImm();
395   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
396   if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
397     return 0;
398 
399   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
400   unsigned PredCond = PPC::getPredicateCondition(Pred);
401   unsigned PredHint = PPC::getPredicateHint(Pred);
402   if (PredCond == PPC::PRED_GE)
403     return PPC::getPredicate(PPC::PRED_GT, PredHint);
404   if (PredCond == PPC::PRED_LT)
405     return PPC::getPredicate(PPC::PRED_LE, PredHint);
406 
407   return 0;
408 }
409 
410 // We can increment immediate x in (GT x) by changing it to (GE x+1) or
411 // (LE x) to (LT x+1)
412 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
413   uint64_t Imm = CMPI->getOperand(2).getImm();
414   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
415   if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
416     return 0;
417 
418   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
419   unsigned PredCond = PPC::getPredicateCondition(Pred);
420   unsigned PredHint = PPC::getPredicateHint(Pred);
421   if (PredCond == PPC::PRED_GT)
422     return PPC::getPredicate(PPC::PRED_GE, PredHint);
423   if (PredCond == PPC::PRED_LE)
424     return PPC::getPredicate(PPC::PRED_LT, PredHint);
425 
426   return 0;
427 }
428 
429 static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
430                                           MachineRegisterInfo *MRI) {
431 
432   auto isEligibleBB = [&](MachineBasicBlock &BB) {
433     auto BII = BB.getFirstInstrTerminator();
434     // We optimize BBs ending with a conditional branch.
435     // We check only for BCC here, not BCCLR, because BCCLR
436     // will be formed only later in the pipeline.
437     if (BB.succ_size() == 2 &&
438         BII != BB.instr_end() &&
439         (*BII).getOpcode() == PPC::BCC &&
440         (*BII).getOperand(1).isReg()) {
441       // We optimize only if the condition code is used only by one BCC.
442       unsigned CndReg = (*BII).getOperand(1).getReg();
443       if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
444           !MRI->hasOneNonDBGUse(CndReg))
445         return false;
446 
447       // We skip this BB if a physical register is used in comparison.
448       MachineInstr *CMPI = MRI->getVRegDef(CndReg);
449       for (MachineOperand &MO : CMPI->operands())
450         if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
451           return false;
452 
453       return true;
454     }
455     return false;
456   };
457 
458   if (MBB.pred_size() != 1)
459     return false;
460 
461   MachineBasicBlock *PredMBB = *MBB.pred_begin();
462   if (isEligibleBB(MBB) && isEligibleBB(*PredMBB))
463     return true;
464 
465   return false;
466 }
467 
468 // If multiple conditional branches are executed based on the (essentially)
469 // same comparison, we merge compare instructions into one and make multiple
470 // conditional branches on this comparison.
471 // For example,
472 //   if (a == 0) { ... }
473 //   else if (a < 0) { ... }
474 // can be executed by one compare and two conditional branches instead of
475 // two pairs of a compare and a conditional branch.
476 //
477 // This method merges two compare instructions in two MBBs and modifies the
478 // compare and conditional branch instructions if needed.
479 // For the above example, the input for this pass looks like:
480 //   cmplwi r3, 0
481 //   beq    0, .LBB0_3
482 //   cmpwi  r3, -1
483 //   bgt    0, .LBB0_4
484 // So, before merging two compares, we need to modify these instructions as
485 //   cmpwi  r3, 0       ; cmplwi and cmpwi yield same result for beq
486 //   beq    0, .LBB0_3
487 //   cmpwi  r3, 0       ; greather than -1 means greater or equal to 0
488 //   bge    0, .LBB0_4
489 
490 bool PPCMIPeephole::eliminateRedundantCompare(void) {
491   bool Simplified = false;
492 
493   for (MachineBasicBlock &MBB2 : *MF) {
494     // We only consider two basic blocks MBB1 and MBB2 if
495     // - both MBBs end with a conditional branch,
496     // - MBB1 is the only predecessor of MBB2, and
497     // - compare does not take a physical register as a operand in both MBBs.
498     if (!eligibleForCompareElimination(MBB2, MRI))
499       continue;
500 
501     MachineBasicBlock *MBB1 = *MBB2.pred_begin();
502     MachineInstr *BI1   = &*MBB1->getFirstInstrTerminator();
503     MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
504 
505     MachineInstr *BI2   = &*MBB2.getFirstInstrTerminator();
506     MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
507 
508     // We cannot optimize an unsupported compare opcode or
509     // a mix of 32-bit and 64-bit comaprisons
510     if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
511         !isSupportedCmpOp(CMPI2->getOpcode()) ||
512         is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
513       continue;
514 
515     unsigned NewOpCode = 0;
516     unsigned NewPredicate1 = 0, NewPredicate2 = 0;
517     int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
518 
519     if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
520       // Typically, unsigned comparison is used for equality check, but
521       // we replace it with a signed comparison if the comparison
522       // to be merged is a signed comparison.
523       // In other cases of opcode mismatch, we cannot optimize this.
524       if (isEqOrNe(BI2) &&
525           CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
526         NewOpCode = CMPI1->getOpcode();
527       else if (isEqOrNe(BI1) &&
528                getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
529         NewOpCode = CMPI2->getOpcode();
530       else continue;
531     }
532 
533     if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
534       // In case of comparisons between two registers, these two registers
535       // must be same to merge two comparisons.
536       unsigned Cmp1Operand1 = CMPI1->getOperand(1).getReg();
537       unsigned Cmp1Operand2 = CMPI1->getOperand(2).getReg();
538       unsigned Cmp2Operand1 = CMPI2->getOperand(1).getReg();
539       unsigned Cmp2Operand2 = CMPI2->getOperand(2).getReg();
540       if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
541         // Same pair of registers in the same order; ready to merge as is.
542       }
543       else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
544         // Same pair of registers in different order.
545         // We reverse the predicate to merge compare instructions.
546         PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
547         NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
548       }
549       else continue;
550     }
551     else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()){
552       // In case of comparisons between a register and an immediate,
553       // the operand register must be same for two compare instructions.
554       if (CMPI1->getOperand(1).getReg() != CMPI2->getOperand(1).getReg())
555         continue;
556 
557       NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
558       NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
559 
560       // If immediate are not same, we try to adjust by changing predicate;
561       // e.g. GT imm means GE (imm+1).
562       if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
563         int Diff = Imm1 - Imm2;
564         if (Diff < -2 || Diff > 2)
565           continue;
566 
567         unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
568         unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
569         unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
570         unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
571         if (Diff == 2) {
572           if (PredToInc2 && PredToDec1) {
573             NewPredicate2 = PredToInc2;
574             NewPredicate1 = PredToDec1;
575             NewImm2++;
576             NewImm1--;
577           }
578         }
579         else if (Diff == 1) {
580           if (PredToInc2) {
581             NewImm2++;
582             NewPredicate2 = PredToInc2;
583           }
584           else if (PredToDec1) {
585             NewImm1--;
586             NewPredicate1 = PredToDec1;
587           }
588         }
589         else if (Diff == -1) {
590           if (PredToDec2) {
591             NewImm2--;
592             NewPredicate2 = PredToDec2;
593           }
594           else if (PredToInc1) {
595             NewImm1++;
596             NewPredicate1 = PredToInc1;
597           }
598         }
599         else if (Diff == -2) {
600           if (PredToDec2 && PredToInc1) {
601             NewPredicate2 = PredToDec2;
602             NewPredicate1 = PredToInc1;
603             NewImm2--;
604             NewImm1++;
605           }
606         }
607       }
608 
609       // We cannnot merge two compares if the immediates are not same.
610       if (NewImm2 != NewImm1)
611         continue;
612     }
613 
614     DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
615     DEBUG(CMPI1->dump());
616     DEBUG(BI1->dump());
617     DEBUG(CMPI2->dump());
618     DEBUG(BI2->dump());
619 
620     // We adjust opcode, predicates and immediate as we determined above.
621     if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
622       CMPI1->setDesc(TII->get(NewOpCode));
623     }
624     if (NewPredicate1) {
625       BI1->getOperand(0).setImm(NewPredicate1);
626     }
627     if (NewPredicate2) {
628       BI2->getOperand(0).setImm(NewPredicate2);
629     }
630     if (NewImm1 != Imm1) {
631       CMPI1->getOperand(2).setImm(NewImm1);
632     }
633 
634     // We finally eliminate compare instruction in MBB2.
635     BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
636     BI2->getOperand(1).setIsKill(true);
637     BI1->getOperand(1).setIsKill(false);
638     CMPI2->eraseFromParent();
639 
640     DEBUG(dbgs() << "into a compare and two branches:\n");
641     DEBUG(CMPI1->dump());
642     DEBUG(BI1->dump());
643     DEBUG(BI2->dump());
644 
645     Simplified = true;
646   }
647 
648   return Simplified;
649 }
650 
651 // This is used to find the "true" source register for an
652 // XXPERMDI instruction, since MachineCSE does not handle the
653 // "copy-like" operations (Copy and SubregToReg).  Returns
654 // the original SrcReg unless it is the target of a copy-like
655 // operation, in which case we chain backwards through all
656 // such operations to the ultimate source register.  If a
657 // physical register is encountered, we stop the search.
658 unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
659 
660   while (true) {
661 
662     MachineInstr *MI = MRI->getVRegDef(SrcReg);
663     if (!MI->isCopyLike())
664       return SrcReg;
665 
666     unsigned CopySrcReg;
667     if (MI->isCopy())
668       CopySrcReg = MI->getOperand(1).getReg();
669     else {
670       assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
671       CopySrcReg = MI->getOperand(2).getReg();
672     }
673 
674     if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
675       return CopySrcReg;
676 
677     SrcReg = CopySrcReg;
678   }
679 }
680 
681 } // end default namespace
682 
683 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
684                       "PowerPC MI Peephole Optimization", false, false)
685 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
686                     "PowerPC MI Peephole Optimization", false, false)
687 
688 char PPCMIPeephole::ID = 0;
689 FunctionPass*
690 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
691 
692