xref: /llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision dff8227189b4b6008143a4dd28f50eaa269a55c8)
1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates machine instructions for the CTR loops related pseudos:
10 // 1: MTCTRloop/DecreaseCTRloop
11 // 2: MTCTR8loop/DecreaseCTR8loop
12 //
13 // If a CTR loop can be generated:
14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16 //    its user branch instruction can be deleted.
17 //
18 // If a CTR loop can not be generated due to clobber of CTR:
19 // 1: MTCTRloop/MTCTR8loop can be deleted.
20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21 //    a "cmplwi/cmpldi".
22 //
23 // This pass runs just before register allocation, because we don't want
24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25 // generated or if a CTR loop can not be generated, we don't have any condition
26 // register for the new added "cmplwi/cmpldi".
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "PPC.h"
31 #include "PPCInstrInfo.h"
32 #include "PPCSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstr.h"
38 #include "llvm/CodeGen/MachineLoopInfo.h"
39 #include "llvm/CodeGen/MachineOperand.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/Register.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/PassRegistry.h"
45 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include <cassert>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "ppc-ctrloops"
53 
54 STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
56 
57 namespace {
58 class PPCCTRLoops : public MachineFunctionPass {
59 public:
60   static char ID;
61 
62   PPCCTRLoops() : MachineFunctionPass(ID) {
63     initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
64   }
65 
66   void getAnalysisUsage(AnalysisUsage &AU) const override {
67     AU.addRequired<MachineLoopInfo>();
68     MachineFunctionPass::getAnalysisUsage(AU);
69   }
70 
71   bool runOnMachineFunction(MachineFunction &MF) override;
72 
73 private:
74   const PPCInstrInfo *TII = nullptr;
75   MachineRegisterInfo *MRI = nullptr;
76 
77   bool processLoop(MachineLoop *ML);
78   bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79   void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80                          MachineInstr *Dec);
81   void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82 };
83 } // namespace
84 
85 char PPCCTRLoops::ID = 0;
86 
87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88                       false, false)
89 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
91                     false, false)
92 
93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94 
95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96   bool Changed = false;
97 
98   auto &MLI = getAnalysis<MachineLoopInfo>();
99   TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100   MRI = &MF.getRegInfo();
101 
102   for (auto *ML : MLI) {
103     if (ML->isOutermost())
104       Changed |= processLoop(ML);
105   }
106 
107   return Changed;
108 }
109 
110 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
111   if (!CheckReads) {
112     // If we are only checking for defs, that is we are going to find
113     // definitions before MTCTRloop, for this case:
114     // CTR defination inside the callee of a call instruction will not impact
115     // the defination of MTCTRloop, so we can use definesRegister() for the
116     // check, no need to check the regmask.
117     return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
118   }
119 
120   if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
121     return true;
122 
123   if (MI->getDesc().isCall())
124     return true;
125 
126   // We define the CTR in the loop preheader, so if there is any CTR reader in
127   // the loop, we also can not use CTR loop form.
128   if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
129     return true;
130 
131   return false;
132 }
133 
134 bool PPCCTRLoops::processLoop(MachineLoop *ML) {
135   bool Changed = false;
136 
137   // Align with HardwareLoop pass, process inner loops first.
138   for (MachineLoop *I : *ML)
139     Changed |= processLoop(I);
140 
141   // If any inner loop is changed, outter loop must be without hardware loop
142   // intrinsics.
143   if (Changed)
144     return true;
145 
146   auto IsLoopStart = [](MachineInstr &MI) {
147     return MI.getOpcode() == PPC::MTCTRloop ||
148            MI.getOpcode() == PPC::MTCTR8loop;
149   };
150 
151   auto SearchForStart =
152       [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
153     for (auto &MI : *MBB) {
154       if (IsLoopStart(MI))
155         return &MI;
156     }
157     return nullptr;
158   };
159 
160   MachineInstr *Start = nullptr;
161   MachineInstr *Dec = nullptr;
162   bool InvalidCTRLoop = false;
163 
164   MachineBasicBlock *Preheader = ML->getLoopPreheader();
165   // If there is no preheader for this loop, there must be no MTCTRloop
166   // either.
167   if (!Preheader)
168     return false;
169 
170   Start = SearchForStart(Preheader);
171   // This is not a CTR loop candidate.
172   if (!Start)
173     return false;
174 
175   // If CTR is live to the preheader, we can not redefine the CTR register.
176   if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
177     InvalidCTRLoop = true;
178 
179   // Make sure there is also no CTR clobber in the block preheader between the
180   // begin and MTCTR.
181   for (MachineBasicBlock::reverse_instr_iterator I =
182            std::next(Start->getReverseIterator());
183        I != Preheader->instr_rend(); ++I)
184     // Only check the definitions of CTR. If there is non-dead definition for
185     // the CTR, we conservatively don't generate a CTR loop.
186     if (isCTRClobber(&*I, /* CheckReads */ false)) {
187       InvalidCTRLoop = true;
188       break;
189     }
190 
191   // Make sure there is also no CTR clobber/user in the block preheader between
192   // MTCTR and the end.
193   for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
194        I != Preheader->instr_end(); ++I)
195     if (isCTRClobber(&*I, /* CheckReads */ true)) {
196       InvalidCTRLoop = true;
197       break;
198     }
199 
200   // Find the CTR loop components and decide whether or not to fall back to a
201   // normal loop.
202   for (auto *MBB : reverse(ML->getBlocks())) {
203     for (auto &MI : *MBB) {
204       if (MI.getOpcode() == PPC::DecreaseCTRloop ||
205           MI.getOpcode() == PPC::DecreaseCTR8loop)
206         Dec = &MI;
207       else if (!InvalidCTRLoop)
208         // If any instruction clobber CTR, then we can not generate a CTR loop.
209         InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
210     }
211     if (Dec && InvalidCTRLoop)
212       break;
213   }
214 
215   assert(Dec && "CTR loop is not complete!");
216 
217   if (InvalidCTRLoop) {
218     expandNormalLoops(ML, Start, Dec);
219     ++NumNormalLoops;
220   }
221   else {
222     expandCTRLoops(ML, Start, Dec);
223     ++NumCTRLoops;
224   }
225   return true;
226 }
227 
228 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
229                                     MachineInstr *Dec) {
230   bool Is64Bit =
231       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
232 
233   MachineBasicBlock *Preheader = Start->getParent();
234   MachineBasicBlock *Exiting = Dec->getParent();
235   assert((Preheader && Exiting) &&
236          "Preheader and exiting should exist for CTR loop!");
237 
238   assert(Dec->getOperand(1).getImm() == 1 &&
239          "Loop decrement stride must be 1");
240 
241   unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
242   unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
243 
244   Register PHIDef =
245       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
246                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
247 
248   Start->getParent()->getParent()->getProperties().reset(
249       MachineFunctionProperties::Property::NoPHIs);
250 
251   // Generate "PHI" in the header block.
252   auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
253                         DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
254   PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
255 
256   Register ADDIDef =
257       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
258                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
259   // Generate "addi -1" in the exiting block.
260   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
261       .addReg(PHIDef)
262       .addImm(-1);
263 
264   // Add other inputs for the PHI node.
265   if (ML->isLoopLatch(Exiting)) {
266     // There must be only two predecessors for the loop header, one is the
267     // Preheader and the other one is loop latch Exiting. In hardware loop
268     // insertion pass, the block containing DecreaseCTRloop must dominate all
269     // loop latches. So there must be only one latch.
270     assert(ML->getHeader()->pred_size() == 2 &&
271            "Loop header predecessor is not right!");
272     PHIMIB.addReg(ADDIDef).addMBB(Exiting);
273   } else {
274     // If the block containing DecreaseCTRloop is not a loop latch, we can use
275     // ADDIDef as the value for all other blocks for the PHI. In hardware loop
276     // insertion pass, the block containing DecreaseCTRloop must dominate all
277     // loop latches.
278     for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
279       if (ML->contains(P)) {
280         assert(ML->isLoopLatch(P) &&
281                "Loop's header in-loop predecessor is not loop latch!");
282         PHIMIB.addReg(ADDIDef).addMBB(P);
283       } else
284         assert(P == Preheader &&
285                "CTR loop should not be generated for irreducible loop!");
286     }
287   }
288 
289   // Generate the compare in the exiting block.
290   Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
291   auto CMPMIB =
292       BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
293           .addReg(ADDIDef)
294           .addImm(0);
295 
296   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
297           Dec->getOperand(0).getReg())
298       .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
299 
300   // Remove the pseudo instructions.
301   Start->eraseFromParent();
302   Dec->eraseFromParent();
303 }
304 
305 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
306                                  MachineInstr *Dec) {
307   bool Is64Bit =
308       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
309 
310   MachineBasicBlock *Preheader = Start->getParent();
311   MachineBasicBlock *Exiting = Dec->getParent();
312 
313   (void)Preheader;
314   assert((Preheader && Exiting) &&
315          "Preheader and exiting should exist for CTR loop!");
316 
317   assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
318 
319   unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
320   unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
321   auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
322   assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
323          "There should be only one user for loop decrement pseudo!");
324 
325   unsigned Opcode = 0;
326   switch (BrInstr->getOpcode()) {
327   case PPC::BC:
328     Opcode = BDNZOpcode;
329     (void) ML;
330     assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
331            "Invalid ctr loop!");
332     break;
333   case PPC::BCn:
334     Opcode = BDZOpcode;
335     assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
336            "Invalid ctr loop!");
337     break;
338   default:
339     llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
340   }
341 
342   // Generate "bdnz/bdz" in the exiting block just before the terminator.
343   BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
344       .addMBB(BrInstr->getOperand(1).getMBB());
345 
346   // Remove the pseudo instructions.
347   BrInstr->eraseFromParent();
348   Dec->eraseFromParent();
349 }
350