xref: /llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision f71cb9dbb739bb58ce7e52e49fe384ff2ff11687)
1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates machine instructions for the CTR loops related pseudos:
10 // 1: MTCTRloop/DecreaseCTRloop
11 // 2: MTCTR8loop/DecreaseCTR8loop
12 //
13 // If a CTR loop can be generated:
14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16 //    its user branch instruction can be deleted.
17 //
18 // If a CTR loop can not be generated due to clobber of CTR:
19 // 1: MTCTRloop/MTCTR8loop can be deleted.
20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21 //    a "cmplwi/cmpldi".
22 //
23 // This pass runs just before register allocation, because we don't want
24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25 // generated or if a CTR loop can not be generated, we don't have any condition
26 // register for the new added "cmplwi/cmpldi".
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "PPC.h"
31 #include "PPCInstrInfo.h"
32 #include "PPCSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstr.h"
38 #include "llvm/CodeGen/MachineLoopInfo.h"
39 #include "llvm/CodeGen/MachineOperand.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/Register.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/PassRegistry.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include <cassert>
47 
48 using namespace llvm;
49 
50 #define DEBUG_TYPE "ppc-ctrloops"
51 
52 STATISTIC(NumCTRLoops, "Number of CTR loops generated");
53 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
54 
55 namespace {
56 class PPCCTRLoops : public MachineFunctionPass {
57 public:
58   static char ID;
59 
60   PPCCTRLoops() : MachineFunctionPass(ID) {
61     initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
62   }
63 
64   void getAnalysisUsage(AnalysisUsage &AU) const override {
65     AU.addRequired<MachineLoopInfoWrapperPass>();
66     MachineFunctionPass::getAnalysisUsage(AU);
67   }
68 
69   bool runOnMachineFunction(MachineFunction &MF) override;
70 
71 private:
72   const PPCInstrInfo *TII = nullptr;
73   MachineRegisterInfo *MRI = nullptr;
74 
75   bool processLoop(MachineLoop *ML);
76   bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
77   void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
78                          MachineInstr *Dec);
79   void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
80 };
81 } // namespace
82 
83 char PPCCTRLoops::ID = 0;
84 
85 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
86                       false, false)
87 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
88 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
89                     false, false)
90 
91 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
92 
93 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
94   bool Changed = false;
95 
96   auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
97   TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
98   MRI = &MF.getRegInfo();
99 
100   for (auto *ML : MLI) {
101     if (ML->isOutermost())
102       Changed |= processLoop(ML);
103   }
104 
105 #ifndef NDEBUG
106   for (const MachineBasicBlock &BB : MF) {
107     for (const MachineInstr &I : BB)
108       assert((I.getOpcode() != PPC::DecreaseCTRloop &&
109               I.getOpcode() != PPC::DecreaseCTR8loop) &&
110              "CTR loop pseudo is not expanded!");
111   }
112 #endif
113 
114   return Changed;
115 }
116 
117 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
118   if (!CheckReads) {
119     // If we are only checking for defs, that is we are going to find
120     // definitions before MTCTRloop, for this case:
121     // CTR defination inside the callee of a call instruction will not impact
122     // the defination of MTCTRloop, so we can use definesRegister() for the
123     // check, no need to check the regmask.
124     return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
125            MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr);
126   }
127 
128   if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) ||
129       MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr))
130     return true;
131 
132   if (MI->getDesc().isCall())
133     return true;
134 
135   // We define the CTR in the loop preheader, so if there is any CTR reader in
136   // the loop, we also can not use CTR loop form.
137   if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) ||
138       MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr))
139     return true;
140 
141   return false;
142 }
143 
144 bool PPCCTRLoops::processLoop(MachineLoop *ML) {
145   bool Changed = false;
146 
147   // Align with HardwareLoop pass, process inner loops first.
148   for (MachineLoop *I : *ML)
149     Changed |= processLoop(I);
150 
151   // If any inner loop is changed, outter loop must be without hardware loop
152   // intrinsics.
153   if (Changed)
154     return true;
155 
156   auto IsLoopStart = [](MachineInstr &MI) {
157     return MI.getOpcode() == PPC::MTCTRloop ||
158            MI.getOpcode() == PPC::MTCTR8loop;
159   };
160 
161   auto SearchForStart =
162       [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
163     for (auto &MI : *MBB) {
164       if (IsLoopStart(MI))
165         return &MI;
166     }
167     return nullptr;
168   };
169 
170   MachineInstr *Start = nullptr;
171   MachineInstr *Dec = nullptr;
172   bool InvalidCTRLoop = false;
173 
174   MachineBasicBlock *Preheader = ML->getLoopPreheader();
175   // If there is no preheader for this loop, there must be no MTCTRloop
176   // either.
177   if (!Preheader)
178     return false;
179 
180   Start = SearchForStart(Preheader);
181   // This is not a CTR loop candidate.
182   if (!Start)
183     return false;
184 
185   // If CTR is live to the preheader, we can not redefine the CTR register.
186   if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
187     InvalidCTRLoop = true;
188 
189   // Make sure there is also no CTR clobber in the block preheader between the
190   // begin and MTCTR.
191   for (MachineBasicBlock::reverse_instr_iterator I =
192            std::next(Start->getReverseIterator());
193        I != Preheader->instr_rend(); ++I)
194     // Only check the definitions of CTR. If there is non-dead definition for
195     // the CTR, we conservatively don't generate a CTR loop.
196     if (isCTRClobber(&*I, /* CheckReads */ false)) {
197       InvalidCTRLoop = true;
198       break;
199     }
200 
201   // Make sure there is also no CTR clobber/user in the block preheader between
202   // MTCTR and the end.
203   for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
204        I != Preheader->instr_end(); ++I)
205     if (isCTRClobber(&*I, /* CheckReads */ true)) {
206       InvalidCTRLoop = true;
207       break;
208     }
209 
210   // Find the CTR loop components and decide whether or not to fall back to a
211   // normal loop.
212   for (auto *MBB : reverse(ML->getBlocks())) {
213     for (auto &MI : *MBB) {
214       if (MI.getOpcode() == PPC::DecreaseCTRloop ||
215           MI.getOpcode() == PPC::DecreaseCTR8loop)
216         Dec = &MI;
217       else if (!InvalidCTRLoop)
218         // If any instruction clobber CTR, then we can not generate a CTR loop.
219         InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
220     }
221     if (Dec && InvalidCTRLoop)
222       break;
223   }
224 
225   assert(Dec && "CTR loop is not complete!");
226 
227   if (InvalidCTRLoop) {
228     expandNormalLoops(ML, Start, Dec);
229     ++NumNormalLoops;
230   }
231   else {
232     expandCTRLoops(ML, Start, Dec);
233     ++NumCTRLoops;
234   }
235   return true;
236 }
237 
238 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
239                                     MachineInstr *Dec) {
240   bool Is64Bit =
241       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
242 
243   MachineBasicBlock *Preheader = Start->getParent();
244   MachineBasicBlock *Exiting = Dec->getParent();
245   assert((Preheader && Exiting) &&
246          "Preheader and exiting should exist for CTR loop!");
247 
248   assert(Dec->getOperand(1).getImm() == 1 &&
249          "Loop decrement stride must be 1");
250 
251   unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
252   unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
253 
254   Register PHIDef =
255       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
256                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
257 
258   Start->getParent()->getParent()->getProperties().reset(
259       MachineFunctionProperties::Property::NoPHIs);
260 
261   // Generate "PHI" in the header block.
262   auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
263                         DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
264   PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
265 
266   Register ADDIDef =
267       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
268                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
269   // Generate "addi -1" in the exiting block.
270   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
271       .addReg(PHIDef)
272       .addImm(-1);
273 
274   // Add other inputs for the PHI node.
275   if (ML->isLoopLatch(Exiting)) {
276     // There must be only two predecessors for the loop header, one is the
277     // Preheader and the other one is loop latch Exiting. In hardware loop
278     // insertion pass, the block containing DecreaseCTRloop must dominate all
279     // loop latches. So there must be only one latch.
280     assert(ML->getHeader()->pred_size() == 2 &&
281            "Loop header predecessor is not right!");
282     PHIMIB.addReg(ADDIDef).addMBB(Exiting);
283   } else {
284     // If the block containing DecreaseCTRloop is not a loop latch, we can use
285     // ADDIDef as the value for all other blocks for the PHI. In hardware loop
286     // insertion pass, the block containing DecreaseCTRloop must dominate all
287     // loop latches.
288     for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
289       if (ML->contains(P)) {
290         assert(ML->isLoopLatch(P) &&
291                "Loop's header in-loop predecessor is not loop latch!");
292         PHIMIB.addReg(ADDIDef).addMBB(P);
293       } else
294         assert(P == Preheader &&
295                "CTR loop should not be generated for irreducible loop!");
296     }
297   }
298 
299   // Generate the compare in the exiting block.
300   Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
301   auto CMPMIB =
302       BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
303           .addReg(ADDIDef)
304           .addImm(0);
305 
306   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
307           Dec->getOperand(0).getReg())
308       .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
309 
310   // Remove the pseudo instructions.
311   Start->eraseFromParent();
312   Dec->eraseFromParent();
313 }
314 
315 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
316                                  MachineInstr *Dec) {
317   bool Is64Bit =
318       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
319 
320   MachineBasicBlock *Preheader = Start->getParent();
321   MachineBasicBlock *Exiting = Dec->getParent();
322 
323   (void)Preheader;
324   assert((Preheader && Exiting) &&
325          "Preheader and exiting should exist for CTR loop!");
326 
327   assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
328 
329   unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
330   unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
331   auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
332   assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
333          "There should be only one user for loop decrement pseudo!");
334 
335   unsigned Opcode = 0;
336   switch (BrInstr->getOpcode()) {
337   case PPC::BC:
338     Opcode = BDNZOpcode;
339     (void) ML;
340     assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
341            "Invalid ctr loop!");
342     break;
343   case PPC::BCn:
344     Opcode = BDZOpcode;
345     assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
346            "Invalid ctr loop!");
347     break;
348   default:
349     llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
350   }
351 
352   // Generate "bdnz/bdz" in the exiting block just before the terminator.
353   BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
354       .addMBB(BrInstr->getOperand(1).getMBB());
355 
356   // Remove the pseudo instructions.
357   BrInstr->eraseFromParent();
358   Dec->eraseFromParent();
359 }
360