xref: /openbsd-src/gnu/llvm/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
1*d415bd75Srobert //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
9*d415bd75Srobert // This pass generates machine instructions for the CTR loops related pseudos:
10*d415bd75Srobert // 1: MTCTRloop/DecreaseCTRloop
11*d415bd75Srobert // 2: MTCTR8loop/DecreaseCTR8loop
12*d415bd75Srobert //
13*d415bd75Srobert // If a CTR loop can be generated:
14*d415bd75Srobert // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15*d415bd75Srobert // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16*d415bd75Srobert //    its user branch instruction can be deleted.
17*d415bd75Srobert //
18*d415bd75Srobert // If a CTR loop can not be generated due to clobber of CTR:
19*d415bd75Srobert // 1: MTCTRloop/MTCTR8loop can be deleted.
20*d415bd75Srobert // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21*d415bd75Srobert //    a "cmplwi/cmpldi".
22*d415bd75Srobert //
23*d415bd75Srobert // This pass runs just before register allocation, because we don't want
24*d415bd75Srobert // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25*d415bd75Srobert // generated or if a CTR loop can not be generated, we don't have any condition
26*d415bd75Srobert // register for the new added "cmplwi/cmpldi".
2709467b48Spatrick //
2809467b48Spatrick //===----------------------------------------------------------------------===//
2909467b48Spatrick 
3073471bf0Spatrick #include "PPC.h"
31*d415bd75Srobert #include "PPCInstrInfo.h"
32*d415bd75Srobert #include "PPCSubtarget.h"
33*d415bd75Srobert #include "llvm/ADT/Statistic.h"
3473471bf0Spatrick #include "llvm/CodeGen/MachineBasicBlock.h"
3509467b48Spatrick #include "llvm/CodeGen/MachineFunction.h"
3609467b48Spatrick #include "llvm/CodeGen/MachineFunctionPass.h"
3773471bf0Spatrick #include "llvm/CodeGen/MachineInstr.h"
38*d415bd75Srobert #include "llvm/CodeGen/MachineLoopInfo.h"
3973471bf0Spatrick #include "llvm/CodeGen/MachineOperand.h"
40*d415bd75Srobert #include "llvm/CodeGen/MachineRegisterInfo.h"
4173471bf0Spatrick #include "llvm/CodeGen/Register.h"
4273471bf0Spatrick #include "llvm/InitializePasses.h"
4373471bf0Spatrick #include "llvm/Pass.h"
4473471bf0Spatrick #include "llvm/PassRegistry.h"
4573471bf0Spatrick #include "llvm/Support/CodeGen.h"
4673471bf0Spatrick #include "llvm/Support/Debug.h"
4773471bf0Spatrick #include "llvm/Support/ErrorHandling.h"
48*d415bd75Srobert #include <cassert>
4909467b48Spatrick 
5009467b48Spatrick using namespace llvm;
5109467b48Spatrick 
52*d415bd75Srobert #define DEBUG_TYPE "ppc-ctrloops"
53*d415bd75Srobert 
54*d415bd75Srobert STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55*d415bd75Srobert STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
5609467b48Spatrick 
5709467b48Spatrick namespace {
58*d415bd75Srobert class PPCCTRLoops : public MachineFunctionPass {
5909467b48Spatrick public:
6009467b48Spatrick   static char ID;
6109467b48Spatrick 
PPCCTRLoops()62*d415bd75Srobert   PPCCTRLoops() : MachineFunctionPass(ID) {
63*d415bd75Srobert     initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
6409467b48Spatrick   }
6509467b48Spatrick 
getAnalysisUsage(AnalysisUsage & AU) const6609467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
67*d415bd75Srobert     AU.addRequired<MachineLoopInfo>();
6809467b48Spatrick     MachineFunctionPass::getAnalysisUsage(AU);
6909467b48Spatrick   }
7009467b48Spatrick 
7109467b48Spatrick   bool runOnMachineFunction(MachineFunction &MF) override;
7209467b48Spatrick 
7309467b48Spatrick private:
74*d415bd75Srobert   const PPCInstrInfo *TII = nullptr;
75*d415bd75Srobert   MachineRegisterInfo *MRI = nullptr;
76*d415bd75Srobert 
77*d415bd75Srobert   bool processLoop(MachineLoop *ML);
78*d415bd75Srobert   bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79*d415bd75Srobert   void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80*d415bd75Srobert                          MachineInstr *Dec);
81*d415bd75Srobert   void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82*d415bd75Srobert };
83*d415bd75Srobert } // namespace
84*d415bd75Srobert 
85*d415bd75Srobert char PPCCTRLoops::ID = 0;
86*d415bd75Srobert 
87*d415bd75Srobert INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88*d415bd75Srobert                       false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)89*d415bd75Srobert INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
90*d415bd75Srobert INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
91*d415bd75Srobert                     false, false)
92*d415bd75Srobert 
93*d415bd75Srobert FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94*d415bd75Srobert 
runOnMachineFunction(MachineFunction & MF)95*d415bd75Srobert bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96*d415bd75Srobert   bool Changed = false;
97*d415bd75Srobert 
98*d415bd75Srobert   auto &MLI = getAnalysis<MachineLoopInfo>();
99*d415bd75Srobert   TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100*d415bd75Srobert   MRI = &MF.getRegInfo();
101*d415bd75Srobert 
102*d415bd75Srobert   for (auto *ML : MLI) {
103*d415bd75Srobert     if (ML->isOutermost())
104*d415bd75Srobert       Changed |= processLoop(ML);
105*d415bd75Srobert   }
106*d415bd75Srobert 
107*d415bd75Srobert #ifndef NDEBUG
108*d415bd75Srobert   for (const MachineBasicBlock &BB : MF) {
109*d415bd75Srobert     for (const MachineInstr &I : BB)
110*d415bd75Srobert       assert((I.getOpcode() != PPC::DecreaseCTRloop &&
111*d415bd75Srobert               I.getOpcode() != PPC::DecreaseCTR8loop) &&
112*d415bd75Srobert              "CTR loop pseudo is not expanded!");
113*d415bd75Srobert   }
114*d415bd75Srobert #endif
115*d415bd75Srobert 
116*d415bd75Srobert   return Changed;
117*d415bd75Srobert }
118*d415bd75Srobert 
isCTRClobber(MachineInstr * MI,bool CheckReads) const119*d415bd75Srobert bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
120*d415bd75Srobert   if (!CheckReads) {
121*d415bd75Srobert     // If we are only checking for defs, that is we are going to find
122*d415bd75Srobert     // definitions before MTCTRloop, for this case:
123*d415bd75Srobert     // CTR defination inside the callee of a call instruction will not impact
124*d415bd75Srobert     // the defination of MTCTRloop, so we can use definesRegister() for the
125*d415bd75Srobert     // check, no need to check the regmask.
126*d415bd75Srobert     return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
127*d415bd75Srobert   }
128*d415bd75Srobert 
129*d415bd75Srobert   if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
130*d415bd75Srobert     return true;
131*d415bd75Srobert 
132*d415bd75Srobert   if (MI->getDesc().isCall())
133*d415bd75Srobert     return true;
134*d415bd75Srobert 
135*d415bd75Srobert   // We define the CTR in the loop preheader, so if there is any CTR reader in
136*d415bd75Srobert   // the loop, we also can not use CTR loop form.
137*d415bd75Srobert   if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
138*d415bd75Srobert     return true;
139*d415bd75Srobert 
140*d415bd75Srobert   return false;
141*d415bd75Srobert }
142*d415bd75Srobert 
processLoop(MachineLoop * ML)143*d415bd75Srobert bool PPCCTRLoops::processLoop(MachineLoop *ML) {
144*d415bd75Srobert   bool Changed = false;
145*d415bd75Srobert 
146*d415bd75Srobert   // Align with HardwareLoop pass, process inner loops first.
147*d415bd75Srobert   for (MachineLoop *I : *ML)
148*d415bd75Srobert     Changed |= processLoop(I);
149*d415bd75Srobert 
150*d415bd75Srobert   // If any inner loop is changed, outter loop must be without hardware loop
151*d415bd75Srobert   // intrinsics.
152*d415bd75Srobert   if (Changed)
153*d415bd75Srobert     return true;
154*d415bd75Srobert 
155*d415bd75Srobert   auto IsLoopStart = [](MachineInstr &MI) {
156*d415bd75Srobert     return MI.getOpcode() == PPC::MTCTRloop ||
157*d415bd75Srobert            MI.getOpcode() == PPC::MTCTR8loop;
15809467b48Spatrick   };
15909467b48Spatrick 
160*d415bd75Srobert   auto SearchForStart =
161*d415bd75Srobert       [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
162*d415bd75Srobert     for (auto &MI : *MBB) {
163*d415bd75Srobert       if (IsLoopStart(MI))
164*d415bd75Srobert         return &MI;
16509467b48Spatrick     }
166*d415bd75Srobert     return nullptr;
167*d415bd75Srobert   };
16809467b48Spatrick 
169*d415bd75Srobert   MachineInstr *Start = nullptr;
170*d415bd75Srobert   MachineInstr *Dec = nullptr;
171*d415bd75Srobert   bool InvalidCTRLoop = false;
17209467b48Spatrick 
173*d415bd75Srobert   MachineBasicBlock *Preheader = ML->getLoopPreheader();
174*d415bd75Srobert   // If there is no preheader for this loop, there must be no MTCTRloop
175*d415bd75Srobert   // either.
176*d415bd75Srobert   if (!Preheader)
17709467b48Spatrick     return false;
178*d415bd75Srobert 
179*d415bd75Srobert   Start = SearchForStart(Preheader);
180*d415bd75Srobert   // This is not a CTR loop candidate.
181*d415bd75Srobert   if (!Start)
182*d415bd75Srobert     return false;
183*d415bd75Srobert 
184*d415bd75Srobert   // If CTR is live to the preheader, we can not redefine the CTR register.
185*d415bd75Srobert   if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
186*d415bd75Srobert     InvalidCTRLoop = true;
187*d415bd75Srobert 
188*d415bd75Srobert   // Make sure there is also no CTR clobber in the block preheader between the
189*d415bd75Srobert   // begin and MTCTR.
190*d415bd75Srobert   for (MachineBasicBlock::reverse_instr_iterator I =
191*d415bd75Srobert            std::next(Start->getReverseIterator());
192*d415bd75Srobert        I != Preheader->instr_rend(); ++I)
193*d415bd75Srobert     // Only check the definitions of CTR. If there is non-dead definition for
194*d415bd75Srobert     // the CTR, we conservatively don't generate a CTR loop.
195*d415bd75Srobert     if (isCTRClobber(&*I, /* CheckReads */ false)) {
196*d415bd75Srobert       InvalidCTRLoop = true;
197*d415bd75Srobert       break;
19809467b48Spatrick     }
19909467b48Spatrick 
200*d415bd75Srobert   // Make sure there is also no CTR clobber/user in the block preheader between
201*d415bd75Srobert   // MTCTR and the end.
202*d415bd75Srobert   for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
203*d415bd75Srobert        I != Preheader->instr_end(); ++I)
204*d415bd75Srobert     if (isCTRClobber(&*I, /* CheckReads */ true)) {
205*d415bd75Srobert       InvalidCTRLoop = true;
206*d415bd75Srobert       break;
207*d415bd75Srobert     }
20809467b48Spatrick 
209*d415bd75Srobert   // Find the CTR loop components and decide whether or not to fall back to a
210*d415bd75Srobert   // normal loop.
211*d415bd75Srobert   for (auto *MBB : reverse(ML->getBlocks())) {
212*d415bd75Srobert     for (auto &MI : *MBB) {
213*d415bd75Srobert       if (MI.getOpcode() == PPC::DecreaseCTRloop ||
214*d415bd75Srobert           MI.getOpcode() == PPC::DecreaseCTR8loop)
215*d415bd75Srobert         Dec = &MI;
216*d415bd75Srobert       else if (!InvalidCTRLoop)
217*d415bd75Srobert         // If any instruction clobber CTR, then we can not generate a CTR loop.
218*d415bd75Srobert         InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
219*d415bd75Srobert     }
220*d415bd75Srobert     if (Dec && InvalidCTRLoop)
221*d415bd75Srobert       break;
222*d415bd75Srobert   }
223*d415bd75Srobert 
224*d415bd75Srobert   assert(Dec && "CTR loop is not complete!");
225*d415bd75Srobert 
226*d415bd75Srobert   if (InvalidCTRLoop) {
227*d415bd75Srobert     expandNormalLoops(ML, Start, Dec);
228*d415bd75Srobert     ++NumNormalLoops;
229*d415bd75Srobert   }
230*d415bd75Srobert   else {
231*d415bd75Srobert     expandCTRLoops(ML, Start, Dec);
232*d415bd75Srobert     ++NumCTRLoops;
233*d415bd75Srobert   }
234*d415bd75Srobert   return true;
235*d415bd75Srobert }
236*d415bd75Srobert 
expandNormalLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)237*d415bd75Srobert void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
238*d415bd75Srobert                                     MachineInstr *Dec) {
239*d415bd75Srobert   bool Is64Bit =
240*d415bd75Srobert       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
241*d415bd75Srobert 
242*d415bd75Srobert   MachineBasicBlock *Preheader = Start->getParent();
243*d415bd75Srobert   MachineBasicBlock *Exiting = Dec->getParent();
244*d415bd75Srobert   assert((Preheader && Exiting) &&
245*d415bd75Srobert          "Preheader and exiting should exist for CTR loop!");
246*d415bd75Srobert 
247*d415bd75Srobert   assert(Dec->getOperand(1).getImm() == 1 &&
248*d415bd75Srobert          "Loop decrement stride must be 1");
249*d415bd75Srobert 
250*d415bd75Srobert   unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
251*d415bd75Srobert   unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
252*d415bd75Srobert 
253*d415bd75Srobert   Register PHIDef =
254*d415bd75Srobert       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
255*d415bd75Srobert                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
256*d415bd75Srobert 
257*d415bd75Srobert   Start->getParent()->getParent()->getProperties().reset(
258*d415bd75Srobert       MachineFunctionProperties::Property::NoPHIs);
259*d415bd75Srobert 
260*d415bd75Srobert   // Generate "PHI" in the header block.
261*d415bd75Srobert   auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
262*d415bd75Srobert                         DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
263*d415bd75Srobert   PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
264*d415bd75Srobert 
265*d415bd75Srobert   Register ADDIDef =
266*d415bd75Srobert       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
267*d415bd75Srobert                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
268*d415bd75Srobert   // Generate "addi -1" in the exiting block.
269*d415bd75Srobert   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
270*d415bd75Srobert       .addReg(PHIDef)
271*d415bd75Srobert       .addImm(-1);
272*d415bd75Srobert 
273*d415bd75Srobert   // Add other inputs for the PHI node.
274*d415bd75Srobert   if (ML->isLoopLatch(Exiting)) {
275*d415bd75Srobert     // There must be only two predecessors for the loop header, one is the
276*d415bd75Srobert     // Preheader and the other one is loop latch Exiting. In hardware loop
277*d415bd75Srobert     // insertion pass, the block containing DecreaseCTRloop must dominate all
278*d415bd75Srobert     // loop latches. So there must be only one latch.
279*d415bd75Srobert     assert(ML->getHeader()->pred_size() == 2 &&
280*d415bd75Srobert            "Loop header predecessor is not right!");
281*d415bd75Srobert     PHIMIB.addReg(ADDIDef).addMBB(Exiting);
282*d415bd75Srobert   } else {
283*d415bd75Srobert     // If the block containing DecreaseCTRloop is not a loop latch, we can use
284*d415bd75Srobert     // ADDIDef as the value for all other blocks for the PHI. In hardware loop
285*d415bd75Srobert     // insertion pass, the block containing DecreaseCTRloop must dominate all
286*d415bd75Srobert     // loop latches.
287*d415bd75Srobert     for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
288*d415bd75Srobert       if (ML->contains(P)) {
289*d415bd75Srobert         assert(ML->isLoopLatch(P) &&
290*d415bd75Srobert                "Loop's header in-loop predecessor is not loop latch!");
291*d415bd75Srobert         PHIMIB.addReg(ADDIDef).addMBB(P);
29209467b48Spatrick       } else
293*d415bd75Srobert         assert(P == Preheader &&
294*d415bd75Srobert                "CTR loop should not be generated for irreducible loop!");
295*d415bd75Srobert     }
296*d415bd75Srobert   }
29709467b48Spatrick 
298*d415bd75Srobert   // Generate the compare in the exiting block.
299*d415bd75Srobert   Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
300*d415bd75Srobert   auto CMPMIB =
301*d415bd75Srobert       BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
302*d415bd75Srobert           .addReg(ADDIDef)
303*d415bd75Srobert           .addImm(0);
30409467b48Spatrick 
305*d415bd75Srobert   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
306*d415bd75Srobert           Dec->getOperand(0).getReg())
307*d415bd75Srobert       .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
308*d415bd75Srobert 
309*d415bd75Srobert   // Remove the pseudo instructions.
310*d415bd75Srobert   Start->eraseFromParent();
311*d415bd75Srobert   Dec->eraseFromParent();
312*d415bd75Srobert }
313*d415bd75Srobert 
expandCTRLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)314*d415bd75Srobert void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
315*d415bd75Srobert                                  MachineInstr *Dec) {
316*d415bd75Srobert   bool Is64Bit =
317*d415bd75Srobert       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
318*d415bd75Srobert 
319*d415bd75Srobert   MachineBasicBlock *Preheader = Start->getParent();
320*d415bd75Srobert   MachineBasicBlock *Exiting = Dec->getParent();
321*d415bd75Srobert 
322*d415bd75Srobert   (void)Preheader;
323*d415bd75Srobert   assert((Preheader && Exiting) &&
324*d415bd75Srobert          "Preheader and exiting should exist for CTR loop!");
325*d415bd75Srobert 
326*d415bd75Srobert   assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
327*d415bd75Srobert 
328*d415bd75Srobert   unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
329*d415bd75Srobert   unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
330*d415bd75Srobert   auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
331*d415bd75Srobert   assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
332*d415bd75Srobert          "There should be only one user for loop decrement pseudo!");
333*d415bd75Srobert 
334*d415bd75Srobert   unsigned Opcode = 0;
335*d415bd75Srobert   switch (BrInstr->getOpcode()) {
336*d415bd75Srobert   case PPC::BC:
337*d415bd75Srobert     Opcode = BDNZOpcode;
338*d415bd75Srobert     (void) ML;
339*d415bd75Srobert     assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
340*d415bd75Srobert            "Invalid ctr loop!");
34109467b48Spatrick     break;
342*d415bd75Srobert   case PPC::BCn:
343*d415bd75Srobert     Opcode = BDZOpcode;
344*d415bd75Srobert     assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
345*d415bd75Srobert            "Invalid ctr loop!");
34609467b48Spatrick     break;
347*d415bd75Srobert   default:
348*d415bd75Srobert     llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
34909467b48Spatrick   }
35009467b48Spatrick 
351*d415bd75Srobert   // Generate "bdnz/bdz" in the exiting block just before the terminator.
352*d415bd75Srobert   BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
353*d415bd75Srobert       .addMBB(BrInstr->getOperand(1).getMBB());
35409467b48Spatrick 
355*d415bd75Srobert   // Remove the pseudo instructions.
356*d415bd75Srobert   BrInstr->eraseFromParent();
357*d415bd75Srobert   Dec->eraseFromParent();
35809467b48Spatrick }
359