xref: /llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision 7c2b76f7cfe7493d7e437c55905c3a6b6bc4d574)
1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates machine instructions for the CTR loops related pseudos:
10 // 1: MTCTRloop/DecreaseCTRloop
11 // 2: MTCTR8loop/DecreaseCTR8loop
12 //
13 // If a CTR loop can be generated:
14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16 //    its user branch instruction can be deleted.
17 //
18 // If a CTR loop can not be generated due to clobber of CTR:
19 // 1: MTCTRloop/MTCTR8loop can be deleted.
20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21 //    a "cmplwi/cmpldi".
22 //
23 // This pass runs just before register allocation, because we don't want
24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25 // generated or if a CTR loop can not be generated, we don't have any condition
26 // register for the new added "cmplwi/cmpldi".
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "PPC.h"
31 #include "PPCInstrInfo.h"
32 #include "PPCSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstr.h"
38 #include "llvm/CodeGen/MachineLoopInfo.h"
39 #include "llvm/CodeGen/MachineOperand.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/Register.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/PassRegistry.h"
45 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include <cassert>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "ppc-ctrloops"
53 
54 STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
56 
57 namespace {
58 class PPCCTRLoops : public MachineFunctionPass {
59 public:
60   static char ID;
61 
62   PPCCTRLoops() : MachineFunctionPass(ID) {
63     initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
64   }
65 
66   void getAnalysisUsage(AnalysisUsage &AU) const override {
67     AU.addRequired<MachineLoopInfo>();
68     MachineFunctionPass::getAnalysisUsage(AU);
69   }
70 
71   bool runOnMachineFunction(MachineFunction &MF) override;
72 
73 private:
74   const PPCInstrInfo *TII = nullptr;
75   MachineRegisterInfo *MRI = nullptr;
76 
77   bool processLoop(MachineLoop *ML);
78   bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79   void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80                          MachineInstr *Dec);
81   void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82 };
83 } // namespace
84 
85 char PPCCTRLoops::ID = 0;
86 
87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88                       false, false)
89 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
91                     false, false)
92 
93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94 
95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96   bool Changed = false;
97 
98   auto &MLI = getAnalysis<MachineLoopInfo>();
99   TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100   MRI = &MF.getRegInfo();
101 
102   for (auto *ML : MLI) {
103     if (ML->isOutermost())
104       Changed |= processLoop(ML);
105   }
106 
107   return Changed;
108 }
109 
110 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
111   if (!CheckReads) {
112     // If we are only checking for defs, that is we are going to find
113     // definitions before MTCTRloop, for this case:
114     // CTR defination inside the callee of a call instruction will not impact
115     // the defination of MTCTRloop, so we can use definesRegister() for the
116     // check, no need to check the regmask.
117     return (MI->definesRegister(PPC::CTR) &&
118             !MI->registerDefIsDead(PPC::CTR)) ||
119            (MI->definesRegister(PPC::CTR8) &&
120             !MI->registerDefIsDead(PPC::CTR8));
121   }
122 
123   if ((MI->modifiesRegister(PPC::CTR) && !MI->registerDefIsDead(PPC::CTR)) ||
124       (MI->modifiesRegister(PPC::CTR8) && !MI->registerDefIsDead(PPC::CTR8)))
125     return true;
126 
127   if (MI->getDesc().isCall())
128     return true;
129 
130   // We define the CTR in the loop preheader, so if there is any CTR reader in
131   // the loop, we also can not use CTR loop form.
132   if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
133     return true;
134 
135   return false;
136 }
137 
138 bool PPCCTRLoops::processLoop(MachineLoop *ML) {
139   bool Changed = false;
140 
141   // Align with HardwareLoop pass, process inner loops first.
142   for (MachineLoop *I : *ML)
143     Changed |= processLoop(I);
144 
145   // If any inner loop is changed, outter loop must be without hardware loop
146   // intrinsics.
147   if (Changed)
148     return true;
149 
150   auto IsLoopStart = [](MachineInstr &MI) {
151     return MI.getOpcode() == PPC::MTCTRloop ||
152            MI.getOpcode() == PPC::MTCTR8loop;
153   };
154 
155   auto SearchForStart =
156       [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
157     for (auto &MI : *MBB) {
158       if (IsLoopStart(MI))
159         return &MI;
160     }
161     return nullptr;
162   };
163 
164   MachineInstr *Start = nullptr;
165   MachineInstr *Dec = nullptr;
166   bool InvalidCTRLoop = false;
167 
168   MachineBasicBlock *Preheader = ML->getLoopPreheader();
169   // If there is no preheader for this loop, there must be no MTCTRloop
170   // either.
171   if (!Preheader)
172     return false;
173 
174   Start = SearchForStart(Preheader);
175   // This is not a CTR loop candidate.
176   if (!Start)
177     return false;
178 
179   // If CTR is live to the preheader, we can not redefine the CTR register.
180   if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
181     InvalidCTRLoop = true;
182 
183   // Make sure there is also no CTR clobber in the block preheader between the
184   // begin and MTCTR.
185   for (MachineBasicBlock::reverse_instr_iterator I =
186            std::next(Start->getReverseIterator());
187        I != Preheader->instr_rend(); ++I)
188     // Only check the definitions of CTR. If there is non-dead definition for
189     // the CTR, we conservatively don't generate a CTR loop.
190     if (isCTRClobber(&*I, /* CheckReads */ false)) {
191       InvalidCTRLoop = true;
192       break;
193     }
194 
195   // Make sure there is also no CTR clobber/user in the block preheader between
196   // MTCTR and the end.
197   for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
198        I != Preheader->instr_end(); ++I)
199     if (isCTRClobber(&*I, /* CheckReads */ true)) {
200       InvalidCTRLoop = true;
201       break;
202     }
203 
204   // Find the CTR loop components and decide whether or not to fall back to a
205   // normal loop.
206   for (auto *MBB : reverse(ML->getBlocks())) {
207     for (auto &MI : *MBB) {
208       if (MI.getOpcode() == PPC::DecreaseCTRloop ||
209           MI.getOpcode() == PPC::DecreaseCTR8loop)
210         Dec = &MI;
211       else if (!InvalidCTRLoop)
212         // If any instruction clobber CTR, then we can not generate a CTR loop.
213         InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
214     }
215     if (Dec && InvalidCTRLoop)
216       break;
217   }
218 
219   assert(Dec && "CTR loop is not complete!");
220 
221   if (InvalidCTRLoop) {
222     expandNormalLoops(ML, Start, Dec);
223     ++NumNormalLoops;
224   }
225   else {
226     expandCTRLoops(ML, Start, Dec);
227     ++NumCTRLoops;
228   }
229   return true;
230 }
231 
232 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
233                                     MachineInstr *Dec) {
234   bool Is64Bit =
235       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
236 
237   MachineBasicBlock *Preheader = Start->getParent();
238   MachineBasicBlock *Exiting = Dec->getParent();
239   assert((Preheader && Exiting) &&
240          "Preheader and exiting should exist for CTR loop!");
241 
242   assert(Dec->getOperand(1).getImm() == 1 &&
243          "Loop decrement stride must be 1");
244 
245   unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
246   unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
247 
248   Register PHIDef =
249       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
250                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
251 
252   Start->getParent()->getParent()->getProperties().reset(
253       MachineFunctionProperties::Property::NoPHIs);
254 
255   // Generate "PHI" in the header block.
256   auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
257                         DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
258   PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
259 
260   Register ADDIDef =
261       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
262                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
263   // Generate "addi -1" in the exiting block.
264   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
265       .addReg(PHIDef)
266       .addImm(-1);
267 
268   // Add other inputs for the PHI node.
269   if (ML->isLoopLatch(Exiting)) {
270     // Normally there must be only two predecessors for the loop header, one is
271     // the Preheader and the other one is loop latch Exiting. In hardware loop
272     // insertion pass, the block containing DecreaseCTRloop must dominate all
273     // loop latches. So there must be only one latch.
274     // But there are some optimizations after ISEL, like tail duplicator, may
275     // merge the two-predecessor loop header with its successor. If the
276     // successor happens to be a header of nest loop, then we will have a header
277     // which has more than 2 predecessors.
278     assert(llvm::is_contained(ML->getHeader()->predecessors(), Exiting) &&
279            "Loop latch is not loop header predecessor!");
280     assert(llvm::is_contained(ML->getHeader()->predecessors(), Preheader) &&
281            "Loop preheader is not loop header predecessor!");
282 
283     PHIMIB.addReg(ADDIDef).addMBB(Exiting);
284 
285     if (ML->getHeader()->pred_size() > 2) {
286       Register HeaderIncoming = MRI->createVirtualRegister(
287           Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
288                   : &PPC::GPRC_and_GPRC_NOR0RegClass);
289       BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(), DebugLoc(),
290               TII->get(TargetOpcode::COPY), HeaderIncoming)
291           .addReg(PHIDef);
292 
293       for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
294         if (P != Preheader && P != Exiting)
295           PHIMIB.addReg(HeaderIncoming).addMBB(P);
296       }
297     }
298   } else {
299     // If the block containing DecreaseCTRloop is not a loop latch, we can use
300     // ADDIDef as the value for all other blocks for the PHI. In hardware loop
301     // insertion pass, the block containing DecreaseCTRloop must dominate all
302     // loop latches.
303     for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
304       if (ML->contains(P)) {
305         assert(ML->isLoopLatch(P) &&
306                "Loop's header in-loop predecessor is not loop latch!");
307         PHIMIB.addReg(ADDIDef).addMBB(P);
308       } else
309         assert(P == Preheader &&
310                "CTR loop should not be generated for irreducible loop!");
311     }
312   }
313 
314   // Generate the compare in the exiting block.
315   Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
316   auto CMPMIB =
317       BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
318           .addReg(ADDIDef)
319           .addImm(0);
320 
321   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
322           Dec->getOperand(0).getReg())
323       .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
324 
325   // Remove the pseudo instructions.
326   Start->eraseFromParent();
327   Dec->eraseFromParent();
328 }
329 
330 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
331                                  MachineInstr *Dec) {
332   bool Is64Bit =
333       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
334 
335   MachineBasicBlock *Preheader = Start->getParent();
336   MachineBasicBlock *Exiting = Dec->getParent();
337 
338   (void)Preheader;
339   assert((Preheader && Exiting) &&
340          "Preheader and exiting should exist for CTR loop!");
341 
342   assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
343 
344   unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
345   unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
346   auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
347   assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
348          "There should be only one user for loop decrement pseudo!");
349 
350   unsigned Opcode = 0;
351   switch (BrInstr->getOpcode()) {
352   case PPC::BC:
353     Opcode = BDNZOpcode;
354     (void) ML;
355     assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
356            "Invalid ctr loop!");
357     break;
358   case PPC::BCn:
359     Opcode = BDZOpcode;
360     assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
361            "Invalid ctr loop!");
362     break;
363   default:
364     llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
365   }
366 
367   // Generate "bdnz/bdz" in the exiting block just before the terminator.
368   BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
369       .addMBB(BrInstr->getOperand(1).getMBB());
370 
371   // Remove the pseudo instructions.
372   BrInstr->eraseFromParent();
373   Dec->eraseFromParent();
374 }
375