1*d415bd75Srobert //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
9*d415bd75Srobert // This pass generates machine instructions for the CTR loops related pseudos:
10*d415bd75Srobert // 1: MTCTRloop/DecreaseCTRloop
11*d415bd75Srobert // 2: MTCTR8loop/DecreaseCTR8loop
12*d415bd75Srobert //
13*d415bd75Srobert // If a CTR loop can be generated:
14*d415bd75Srobert // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15*d415bd75Srobert // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16*d415bd75Srobert // its user branch instruction can be deleted.
17*d415bd75Srobert //
18*d415bd75Srobert // If a CTR loop can not be generated due to clobber of CTR:
19*d415bd75Srobert // 1: MTCTRloop/MTCTR8loop can be deleted.
20*d415bd75Srobert // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21*d415bd75Srobert // a "cmplwi/cmpldi".
22*d415bd75Srobert //
23*d415bd75Srobert // This pass runs just before register allocation, because we don't want
24*d415bd75Srobert // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25*d415bd75Srobert // generated or if a CTR loop can not be generated, we don't have any condition
26*d415bd75Srobert // register for the new added "cmplwi/cmpldi".
2709467b48Spatrick //
2809467b48Spatrick //===----------------------------------------------------------------------===//
2909467b48Spatrick
3073471bf0Spatrick #include "PPC.h"
31*d415bd75Srobert #include "PPCInstrInfo.h"
32*d415bd75Srobert #include "PPCSubtarget.h"
33*d415bd75Srobert #include "llvm/ADT/Statistic.h"
3473471bf0Spatrick #include "llvm/CodeGen/MachineBasicBlock.h"
3509467b48Spatrick #include "llvm/CodeGen/MachineFunction.h"
3609467b48Spatrick #include "llvm/CodeGen/MachineFunctionPass.h"
3773471bf0Spatrick #include "llvm/CodeGen/MachineInstr.h"
38*d415bd75Srobert #include "llvm/CodeGen/MachineLoopInfo.h"
3973471bf0Spatrick #include "llvm/CodeGen/MachineOperand.h"
40*d415bd75Srobert #include "llvm/CodeGen/MachineRegisterInfo.h"
4173471bf0Spatrick #include "llvm/CodeGen/Register.h"
4273471bf0Spatrick #include "llvm/InitializePasses.h"
4373471bf0Spatrick #include "llvm/Pass.h"
4473471bf0Spatrick #include "llvm/PassRegistry.h"
4573471bf0Spatrick #include "llvm/Support/CodeGen.h"
4673471bf0Spatrick #include "llvm/Support/Debug.h"
4773471bf0Spatrick #include "llvm/Support/ErrorHandling.h"
48*d415bd75Srobert #include <cassert>
4909467b48Spatrick
5009467b48Spatrick using namespace llvm;
5109467b48Spatrick
52*d415bd75Srobert #define DEBUG_TYPE "ppc-ctrloops"
53*d415bd75Srobert
54*d415bd75Srobert STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55*d415bd75Srobert STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
5609467b48Spatrick
5709467b48Spatrick namespace {
58*d415bd75Srobert class PPCCTRLoops : public MachineFunctionPass {
5909467b48Spatrick public:
6009467b48Spatrick static char ID;
6109467b48Spatrick
PPCCTRLoops()62*d415bd75Srobert PPCCTRLoops() : MachineFunctionPass(ID) {
63*d415bd75Srobert initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
6409467b48Spatrick }
6509467b48Spatrick
getAnalysisUsage(AnalysisUsage & AU) const6609467b48Spatrick void getAnalysisUsage(AnalysisUsage &AU) const override {
67*d415bd75Srobert AU.addRequired<MachineLoopInfo>();
6809467b48Spatrick MachineFunctionPass::getAnalysisUsage(AU);
6909467b48Spatrick }
7009467b48Spatrick
7109467b48Spatrick bool runOnMachineFunction(MachineFunction &MF) override;
7209467b48Spatrick
7309467b48Spatrick private:
74*d415bd75Srobert const PPCInstrInfo *TII = nullptr;
75*d415bd75Srobert MachineRegisterInfo *MRI = nullptr;
76*d415bd75Srobert
77*d415bd75Srobert bool processLoop(MachineLoop *ML);
78*d415bd75Srobert bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79*d415bd75Srobert void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80*d415bd75Srobert MachineInstr *Dec);
81*d415bd75Srobert void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82*d415bd75Srobert };
83*d415bd75Srobert } // namespace
84*d415bd75Srobert
85*d415bd75Srobert char PPCCTRLoops::ID = 0;
86*d415bd75Srobert
87*d415bd75Srobert INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88*d415bd75Srobert false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)89*d415bd75Srobert INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
90*d415bd75Srobert INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
91*d415bd75Srobert false, false)
92*d415bd75Srobert
93*d415bd75Srobert FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94*d415bd75Srobert
runOnMachineFunction(MachineFunction & MF)95*d415bd75Srobert bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96*d415bd75Srobert bool Changed = false;
97*d415bd75Srobert
98*d415bd75Srobert auto &MLI = getAnalysis<MachineLoopInfo>();
99*d415bd75Srobert TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100*d415bd75Srobert MRI = &MF.getRegInfo();
101*d415bd75Srobert
102*d415bd75Srobert for (auto *ML : MLI) {
103*d415bd75Srobert if (ML->isOutermost())
104*d415bd75Srobert Changed |= processLoop(ML);
105*d415bd75Srobert }
106*d415bd75Srobert
107*d415bd75Srobert #ifndef NDEBUG
108*d415bd75Srobert for (const MachineBasicBlock &BB : MF) {
109*d415bd75Srobert for (const MachineInstr &I : BB)
110*d415bd75Srobert assert((I.getOpcode() != PPC::DecreaseCTRloop &&
111*d415bd75Srobert I.getOpcode() != PPC::DecreaseCTR8loop) &&
112*d415bd75Srobert "CTR loop pseudo is not expanded!");
113*d415bd75Srobert }
114*d415bd75Srobert #endif
115*d415bd75Srobert
116*d415bd75Srobert return Changed;
117*d415bd75Srobert }
118*d415bd75Srobert
isCTRClobber(MachineInstr * MI,bool CheckReads) const119*d415bd75Srobert bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
120*d415bd75Srobert if (!CheckReads) {
121*d415bd75Srobert // If we are only checking for defs, that is we are going to find
122*d415bd75Srobert // definitions before MTCTRloop, for this case:
123*d415bd75Srobert // CTR defination inside the callee of a call instruction will not impact
124*d415bd75Srobert // the defination of MTCTRloop, so we can use definesRegister() for the
125*d415bd75Srobert // check, no need to check the regmask.
126*d415bd75Srobert return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
127*d415bd75Srobert }
128*d415bd75Srobert
129*d415bd75Srobert if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
130*d415bd75Srobert return true;
131*d415bd75Srobert
132*d415bd75Srobert if (MI->getDesc().isCall())
133*d415bd75Srobert return true;
134*d415bd75Srobert
135*d415bd75Srobert // We define the CTR in the loop preheader, so if there is any CTR reader in
136*d415bd75Srobert // the loop, we also can not use CTR loop form.
137*d415bd75Srobert if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
138*d415bd75Srobert return true;
139*d415bd75Srobert
140*d415bd75Srobert return false;
141*d415bd75Srobert }
142*d415bd75Srobert
processLoop(MachineLoop * ML)143*d415bd75Srobert bool PPCCTRLoops::processLoop(MachineLoop *ML) {
144*d415bd75Srobert bool Changed = false;
145*d415bd75Srobert
146*d415bd75Srobert // Align with HardwareLoop pass, process inner loops first.
147*d415bd75Srobert for (MachineLoop *I : *ML)
148*d415bd75Srobert Changed |= processLoop(I);
149*d415bd75Srobert
150*d415bd75Srobert // If any inner loop is changed, outter loop must be without hardware loop
151*d415bd75Srobert // intrinsics.
152*d415bd75Srobert if (Changed)
153*d415bd75Srobert return true;
154*d415bd75Srobert
155*d415bd75Srobert auto IsLoopStart = [](MachineInstr &MI) {
156*d415bd75Srobert return MI.getOpcode() == PPC::MTCTRloop ||
157*d415bd75Srobert MI.getOpcode() == PPC::MTCTR8loop;
15809467b48Spatrick };
15909467b48Spatrick
160*d415bd75Srobert auto SearchForStart =
161*d415bd75Srobert [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
162*d415bd75Srobert for (auto &MI : *MBB) {
163*d415bd75Srobert if (IsLoopStart(MI))
164*d415bd75Srobert return &MI;
16509467b48Spatrick }
166*d415bd75Srobert return nullptr;
167*d415bd75Srobert };
16809467b48Spatrick
169*d415bd75Srobert MachineInstr *Start = nullptr;
170*d415bd75Srobert MachineInstr *Dec = nullptr;
171*d415bd75Srobert bool InvalidCTRLoop = false;
17209467b48Spatrick
173*d415bd75Srobert MachineBasicBlock *Preheader = ML->getLoopPreheader();
174*d415bd75Srobert // If there is no preheader for this loop, there must be no MTCTRloop
175*d415bd75Srobert // either.
176*d415bd75Srobert if (!Preheader)
17709467b48Spatrick return false;
178*d415bd75Srobert
179*d415bd75Srobert Start = SearchForStart(Preheader);
180*d415bd75Srobert // This is not a CTR loop candidate.
181*d415bd75Srobert if (!Start)
182*d415bd75Srobert return false;
183*d415bd75Srobert
184*d415bd75Srobert // If CTR is live to the preheader, we can not redefine the CTR register.
185*d415bd75Srobert if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
186*d415bd75Srobert InvalidCTRLoop = true;
187*d415bd75Srobert
188*d415bd75Srobert // Make sure there is also no CTR clobber in the block preheader between the
189*d415bd75Srobert // begin and MTCTR.
190*d415bd75Srobert for (MachineBasicBlock::reverse_instr_iterator I =
191*d415bd75Srobert std::next(Start->getReverseIterator());
192*d415bd75Srobert I != Preheader->instr_rend(); ++I)
193*d415bd75Srobert // Only check the definitions of CTR. If there is non-dead definition for
194*d415bd75Srobert // the CTR, we conservatively don't generate a CTR loop.
195*d415bd75Srobert if (isCTRClobber(&*I, /* CheckReads */ false)) {
196*d415bd75Srobert InvalidCTRLoop = true;
197*d415bd75Srobert break;
19809467b48Spatrick }
19909467b48Spatrick
200*d415bd75Srobert // Make sure there is also no CTR clobber/user in the block preheader between
201*d415bd75Srobert // MTCTR and the end.
202*d415bd75Srobert for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
203*d415bd75Srobert I != Preheader->instr_end(); ++I)
204*d415bd75Srobert if (isCTRClobber(&*I, /* CheckReads */ true)) {
205*d415bd75Srobert InvalidCTRLoop = true;
206*d415bd75Srobert break;
207*d415bd75Srobert }
20809467b48Spatrick
209*d415bd75Srobert // Find the CTR loop components and decide whether or not to fall back to a
210*d415bd75Srobert // normal loop.
211*d415bd75Srobert for (auto *MBB : reverse(ML->getBlocks())) {
212*d415bd75Srobert for (auto &MI : *MBB) {
213*d415bd75Srobert if (MI.getOpcode() == PPC::DecreaseCTRloop ||
214*d415bd75Srobert MI.getOpcode() == PPC::DecreaseCTR8loop)
215*d415bd75Srobert Dec = &MI;
216*d415bd75Srobert else if (!InvalidCTRLoop)
217*d415bd75Srobert // If any instruction clobber CTR, then we can not generate a CTR loop.
218*d415bd75Srobert InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
219*d415bd75Srobert }
220*d415bd75Srobert if (Dec && InvalidCTRLoop)
221*d415bd75Srobert break;
222*d415bd75Srobert }
223*d415bd75Srobert
224*d415bd75Srobert assert(Dec && "CTR loop is not complete!");
225*d415bd75Srobert
226*d415bd75Srobert if (InvalidCTRLoop) {
227*d415bd75Srobert expandNormalLoops(ML, Start, Dec);
228*d415bd75Srobert ++NumNormalLoops;
229*d415bd75Srobert }
230*d415bd75Srobert else {
231*d415bd75Srobert expandCTRLoops(ML, Start, Dec);
232*d415bd75Srobert ++NumCTRLoops;
233*d415bd75Srobert }
234*d415bd75Srobert return true;
235*d415bd75Srobert }
236*d415bd75Srobert
expandNormalLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)237*d415bd75Srobert void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
238*d415bd75Srobert MachineInstr *Dec) {
239*d415bd75Srobert bool Is64Bit =
240*d415bd75Srobert Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
241*d415bd75Srobert
242*d415bd75Srobert MachineBasicBlock *Preheader = Start->getParent();
243*d415bd75Srobert MachineBasicBlock *Exiting = Dec->getParent();
244*d415bd75Srobert assert((Preheader && Exiting) &&
245*d415bd75Srobert "Preheader and exiting should exist for CTR loop!");
246*d415bd75Srobert
247*d415bd75Srobert assert(Dec->getOperand(1).getImm() == 1 &&
248*d415bd75Srobert "Loop decrement stride must be 1");
249*d415bd75Srobert
250*d415bd75Srobert unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
251*d415bd75Srobert unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
252*d415bd75Srobert
253*d415bd75Srobert Register PHIDef =
254*d415bd75Srobert MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
255*d415bd75Srobert : &PPC::GPRC_and_GPRC_NOR0RegClass);
256*d415bd75Srobert
257*d415bd75Srobert Start->getParent()->getParent()->getProperties().reset(
258*d415bd75Srobert MachineFunctionProperties::Property::NoPHIs);
259*d415bd75Srobert
260*d415bd75Srobert // Generate "PHI" in the header block.
261*d415bd75Srobert auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
262*d415bd75Srobert DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
263*d415bd75Srobert PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
264*d415bd75Srobert
265*d415bd75Srobert Register ADDIDef =
266*d415bd75Srobert MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
267*d415bd75Srobert : &PPC::GPRC_and_GPRC_NOR0RegClass);
268*d415bd75Srobert // Generate "addi -1" in the exiting block.
269*d415bd75Srobert BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
270*d415bd75Srobert .addReg(PHIDef)
271*d415bd75Srobert .addImm(-1);
272*d415bd75Srobert
273*d415bd75Srobert // Add other inputs for the PHI node.
274*d415bd75Srobert if (ML->isLoopLatch(Exiting)) {
275*d415bd75Srobert // There must be only two predecessors for the loop header, one is the
276*d415bd75Srobert // Preheader and the other one is loop latch Exiting. In hardware loop
277*d415bd75Srobert // insertion pass, the block containing DecreaseCTRloop must dominate all
278*d415bd75Srobert // loop latches. So there must be only one latch.
279*d415bd75Srobert assert(ML->getHeader()->pred_size() == 2 &&
280*d415bd75Srobert "Loop header predecessor is not right!");
281*d415bd75Srobert PHIMIB.addReg(ADDIDef).addMBB(Exiting);
282*d415bd75Srobert } else {
283*d415bd75Srobert // If the block containing DecreaseCTRloop is not a loop latch, we can use
284*d415bd75Srobert // ADDIDef as the value for all other blocks for the PHI. In hardware loop
285*d415bd75Srobert // insertion pass, the block containing DecreaseCTRloop must dominate all
286*d415bd75Srobert // loop latches.
287*d415bd75Srobert for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
288*d415bd75Srobert if (ML->contains(P)) {
289*d415bd75Srobert assert(ML->isLoopLatch(P) &&
290*d415bd75Srobert "Loop's header in-loop predecessor is not loop latch!");
291*d415bd75Srobert PHIMIB.addReg(ADDIDef).addMBB(P);
29209467b48Spatrick } else
293*d415bd75Srobert assert(P == Preheader &&
294*d415bd75Srobert "CTR loop should not be generated for irreducible loop!");
295*d415bd75Srobert }
296*d415bd75Srobert }
29709467b48Spatrick
298*d415bd75Srobert // Generate the compare in the exiting block.
299*d415bd75Srobert Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
300*d415bd75Srobert auto CMPMIB =
301*d415bd75Srobert BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
302*d415bd75Srobert .addReg(ADDIDef)
303*d415bd75Srobert .addImm(0);
30409467b48Spatrick
305*d415bd75Srobert BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
306*d415bd75Srobert Dec->getOperand(0).getReg())
307*d415bd75Srobert .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
308*d415bd75Srobert
309*d415bd75Srobert // Remove the pseudo instructions.
310*d415bd75Srobert Start->eraseFromParent();
311*d415bd75Srobert Dec->eraseFromParent();
312*d415bd75Srobert }
313*d415bd75Srobert
expandCTRLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)314*d415bd75Srobert void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
315*d415bd75Srobert MachineInstr *Dec) {
316*d415bd75Srobert bool Is64Bit =
317*d415bd75Srobert Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
318*d415bd75Srobert
319*d415bd75Srobert MachineBasicBlock *Preheader = Start->getParent();
320*d415bd75Srobert MachineBasicBlock *Exiting = Dec->getParent();
321*d415bd75Srobert
322*d415bd75Srobert (void)Preheader;
323*d415bd75Srobert assert((Preheader && Exiting) &&
324*d415bd75Srobert "Preheader and exiting should exist for CTR loop!");
325*d415bd75Srobert
326*d415bd75Srobert assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
327*d415bd75Srobert
328*d415bd75Srobert unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
329*d415bd75Srobert unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
330*d415bd75Srobert auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
331*d415bd75Srobert assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
332*d415bd75Srobert "There should be only one user for loop decrement pseudo!");
333*d415bd75Srobert
334*d415bd75Srobert unsigned Opcode = 0;
335*d415bd75Srobert switch (BrInstr->getOpcode()) {
336*d415bd75Srobert case PPC::BC:
337*d415bd75Srobert Opcode = BDNZOpcode;
338*d415bd75Srobert (void) ML;
339*d415bd75Srobert assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
340*d415bd75Srobert "Invalid ctr loop!");
34109467b48Spatrick break;
342*d415bd75Srobert case PPC::BCn:
343*d415bd75Srobert Opcode = BDZOpcode;
344*d415bd75Srobert assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
345*d415bd75Srobert "Invalid ctr loop!");
34609467b48Spatrick break;
347*d415bd75Srobert default:
348*d415bd75Srobert llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
34909467b48Spatrick }
35009467b48Spatrick
351*d415bd75Srobert // Generate "bdnz/bdz" in the exiting block just before the terminator.
352*d415bd75Srobert BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
353*d415bd75Srobert .addMBB(BrInstr->getOperand(1).getMBB());
35409467b48Spatrick
355*d415bd75Srobert // Remove the pseudo instructions.
356*d415bd75Srobert BrInstr->eraseFromParent();
357*d415bd75Srobert Dec->eraseFromParent();
35809467b48Spatrick }
359