xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/X86/X86PadShortFunction.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
17330f729Sjoerg //===-------- X86PadShortFunction.cpp - pad short functions -----------===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // This file defines the pass which will pad short functions to prevent
107330f729Sjoerg // a stall if a function returns before the return address is ready. This
117330f729Sjoerg // is needed for some Intel Atom processors.
127330f729Sjoerg //
137330f729Sjoerg //===----------------------------------------------------------------------===//
147330f729Sjoerg 
157330f729Sjoerg 
167330f729Sjoerg #include "X86.h"
177330f729Sjoerg #include "X86InstrInfo.h"
187330f729Sjoerg #include "X86Subtarget.h"
197330f729Sjoerg #include "llvm/ADT/Statistic.h"
20*82d56013Sjoerg #include "llvm/Analysis/ProfileSummaryInfo.h"
21*82d56013Sjoerg #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
227330f729Sjoerg #include "llvm/CodeGen/MachineFunctionPass.h"
237330f729Sjoerg #include "llvm/CodeGen/MachineInstrBuilder.h"
24*82d56013Sjoerg #include "llvm/CodeGen/MachineSizeOpts.h"
257330f729Sjoerg #include "llvm/CodeGen/Passes.h"
267330f729Sjoerg #include "llvm/CodeGen/TargetSchedule.h"
277330f729Sjoerg #include "llvm/IR/Function.h"
287330f729Sjoerg #include "llvm/Support/Debug.h"
297330f729Sjoerg #include "llvm/Support/raw_ostream.h"
307330f729Sjoerg 
317330f729Sjoerg using namespace llvm;
327330f729Sjoerg 
337330f729Sjoerg #define DEBUG_TYPE "x86-pad-short-functions"
347330f729Sjoerg 
357330f729Sjoerg STATISTIC(NumBBsPadded, "Number of basic blocks padded");
367330f729Sjoerg 
377330f729Sjoerg namespace {
387330f729Sjoerg   struct VisitedBBInfo {
397330f729Sjoerg     // HasReturn - Whether the BB contains a return instruction
407330f729Sjoerg     bool HasReturn;
417330f729Sjoerg 
427330f729Sjoerg     // Cycles - Number of cycles until return if HasReturn is true, otherwise
437330f729Sjoerg     // number of cycles until end of the BB
447330f729Sjoerg     unsigned int Cycles;
457330f729Sjoerg 
VisitedBBInfo__anon0199e2f70111::VisitedBBInfo467330f729Sjoerg     VisitedBBInfo() : HasReturn(false), Cycles(0) {}
VisitedBBInfo__anon0199e2f70111::VisitedBBInfo477330f729Sjoerg     VisitedBBInfo(bool HasReturn, unsigned int Cycles)
487330f729Sjoerg       : HasReturn(HasReturn), Cycles(Cycles) {}
497330f729Sjoerg   };
507330f729Sjoerg 
517330f729Sjoerg   struct PadShortFunc : public MachineFunctionPass {
527330f729Sjoerg     static char ID;
PadShortFunc__anon0199e2f70111::PadShortFunc537330f729Sjoerg     PadShortFunc() : MachineFunctionPass(ID)
547330f729Sjoerg                    , Threshold(4) {}
557330f729Sjoerg 
567330f729Sjoerg     bool runOnMachineFunction(MachineFunction &MF) override;
577330f729Sjoerg 
getAnalysisUsage__anon0199e2f70111::PadShortFunc58*82d56013Sjoerg     void getAnalysisUsage(AnalysisUsage &AU) const override {
59*82d56013Sjoerg       AU.addRequired<ProfileSummaryInfoWrapperPass>();
60*82d56013Sjoerg       AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
61*82d56013Sjoerg       AU.addPreserved<LazyMachineBlockFrequencyInfoPass>();
62*82d56013Sjoerg       MachineFunctionPass::getAnalysisUsage(AU);
63*82d56013Sjoerg     }
64*82d56013Sjoerg 
getRequiredProperties__anon0199e2f70111::PadShortFunc657330f729Sjoerg     MachineFunctionProperties getRequiredProperties() const override {
667330f729Sjoerg       return MachineFunctionProperties().set(
677330f729Sjoerg           MachineFunctionProperties::Property::NoVRegs);
687330f729Sjoerg     }
697330f729Sjoerg 
getPassName__anon0199e2f70111::PadShortFunc707330f729Sjoerg     StringRef getPassName() const override {
717330f729Sjoerg       return "X86 Atom pad short functions";
727330f729Sjoerg     }
737330f729Sjoerg 
747330f729Sjoerg   private:
757330f729Sjoerg     void findReturns(MachineBasicBlock *MBB,
767330f729Sjoerg                      unsigned int Cycles = 0);
777330f729Sjoerg 
787330f729Sjoerg     bool cyclesUntilReturn(MachineBasicBlock *MBB,
797330f729Sjoerg                            unsigned int &Cycles);
807330f729Sjoerg 
817330f729Sjoerg     void addPadding(MachineBasicBlock *MBB,
827330f729Sjoerg                     MachineBasicBlock::iterator &MBBI,
837330f729Sjoerg                     unsigned int NOOPsToAdd);
847330f729Sjoerg 
857330f729Sjoerg     const unsigned int Threshold;
867330f729Sjoerg 
877330f729Sjoerg     // ReturnBBs - Maps basic blocks that return to the minimum number of
887330f729Sjoerg     // cycles until the return, starting from the entry block.
897330f729Sjoerg     DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs;
907330f729Sjoerg 
917330f729Sjoerg     // VisitedBBs - Cache of previously visited BBs.
927330f729Sjoerg     DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
937330f729Sjoerg 
947330f729Sjoerg     TargetSchedModel TSM;
957330f729Sjoerg   };
967330f729Sjoerg 
977330f729Sjoerg   char PadShortFunc::ID = 0;
987330f729Sjoerg }
997330f729Sjoerg 
createX86PadShortFunctions()1007330f729Sjoerg FunctionPass *llvm::createX86PadShortFunctions() {
1017330f729Sjoerg   return new PadShortFunc();
1027330f729Sjoerg }
1037330f729Sjoerg 
1047330f729Sjoerg /// runOnMachineFunction - Loop over all of the basic blocks, inserting
1057330f729Sjoerg /// NOOP instructions before early exits.
runOnMachineFunction(MachineFunction & MF)1067330f729Sjoerg bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
1077330f729Sjoerg   if (skipFunction(MF.getFunction()))
1087330f729Sjoerg     return false;
1097330f729Sjoerg 
1107330f729Sjoerg   if (MF.getFunction().hasOptSize())
1117330f729Sjoerg     return false;
1127330f729Sjoerg 
1137330f729Sjoerg   if (!MF.getSubtarget<X86Subtarget>().padShortFunctions())
1147330f729Sjoerg     return false;
1157330f729Sjoerg 
1167330f729Sjoerg   TSM.init(&MF.getSubtarget());
1177330f729Sjoerg 
118*82d56013Sjoerg   auto *PSI =
119*82d56013Sjoerg       &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
120*82d56013Sjoerg   auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
121*82d56013Sjoerg                &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
122*82d56013Sjoerg                nullptr;
123*82d56013Sjoerg 
1247330f729Sjoerg   // Search through basic blocks and mark the ones that have early returns
1257330f729Sjoerg   ReturnBBs.clear();
1267330f729Sjoerg   VisitedBBs.clear();
1277330f729Sjoerg   findReturns(&MF.front());
1287330f729Sjoerg 
1297330f729Sjoerg   bool MadeChange = false;
1307330f729Sjoerg 
1317330f729Sjoerg   // Pad the identified basic blocks with NOOPs
1327330f729Sjoerg   for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
1337330f729Sjoerg        I != ReturnBBs.end(); ++I) {
1347330f729Sjoerg     MachineBasicBlock *MBB = I->first;
1357330f729Sjoerg     unsigned Cycles = I->second;
1367330f729Sjoerg 
137*82d56013Sjoerg     // Function::hasOptSize is already checked above.
138*82d56013Sjoerg     bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
139*82d56013Sjoerg     if (OptForSize)
140*82d56013Sjoerg       continue;
141*82d56013Sjoerg 
1427330f729Sjoerg     if (Cycles < Threshold) {
1437330f729Sjoerg       // BB ends in a return. Skip over any DBG_VALUE instructions
1447330f729Sjoerg       // trailing the terminator.
1457330f729Sjoerg       assert(MBB->size() > 0 &&
1467330f729Sjoerg              "Basic block should contain at least a RET but is empty");
1477330f729Sjoerg       MachineBasicBlock::iterator ReturnLoc = --MBB->end();
1487330f729Sjoerg 
1497330f729Sjoerg       while (ReturnLoc->isDebugInstr())
1507330f729Sjoerg         --ReturnLoc;
1517330f729Sjoerg       assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
1527330f729Sjoerg              "Basic block does not end with RET");
1537330f729Sjoerg 
1547330f729Sjoerg       addPadding(MBB, ReturnLoc, Threshold - Cycles);
1557330f729Sjoerg       NumBBsPadded++;
1567330f729Sjoerg       MadeChange = true;
1577330f729Sjoerg     }
1587330f729Sjoerg   }
1597330f729Sjoerg 
1607330f729Sjoerg   return MadeChange;
1617330f729Sjoerg }
1627330f729Sjoerg 
1637330f729Sjoerg /// findReturn - Starting at MBB, follow control flow and add all
1647330f729Sjoerg /// basic blocks that contain a return to ReturnBBs.
findReturns(MachineBasicBlock * MBB,unsigned int Cycles)1657330f729Sjoerg void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
1667330f729Sjoerg   // If this BB has a return, note how many cycles it takes to get there.
1677330f729Sjoerg   bool hasReturn = cyclesUntilReturn(MBB, Cycles);
1687330f729Sjoerg   if (Cycles >= Threshold)
1697330f729Sjoerg     return;
1707330f729Sjoerg 
1717330f729Sjoerg   if (hasReturn) {
1727330f729Sjoerg     ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
1737330f729Sjoerg     return;
1747330f729Sjoerg   }
1757330f729Sjoerg 
1767330f729Sjoerg   // Follow branches in BB and look for returns
1777330f729Sjoerg   for (MachineBasicBlock::succ_iterator I = MBB->succ_begin();
1787330f729Sjoerg        I != MBB->succ_end(); ++I) {
1797330f729Sjoerg     if (*I == MBB)
1807330f729Sjoerg       continue;
1817330f729Sjoerg     findReturns(*I, Cycles);
1827330f729Sjoerg   }
1837330f729Sjoerg }
1847330f729Sjoerg 
1857330f729Sjoerg /// cyclesUntilReturn - return true if the MBB has a return instruction,
1867330f729Sjoerg /// and return false otherwise.
1877330f729Sjoerg /// Cycles will be incremented by the number of cycles taken to reach the
1887330f729Sjoerg /// return or the end of the BB, whichever occurs first.
cyclesUntilReturn(MachineBasicBlock * MBB,unsigned int & Cycles)1897330f729Sjoerg bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
1907330f729Sjoerg                                      unsigned int &Cycles) {
1917330f729Sjoerg   // Return cached result if BB was previously visited
1927330f729Sjoerg   DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it
1937330f729Sjoerg     = VisitedBBs.find(MBB);
1947330f729Sjoerg   if (it != VisitedBBs.end()) {
1957330f729Sjoerg     VisitedBBInfo BBInfo = it->second;
1967330f729Sjoerg     Cycles += BBInfo.Cycles;
1977330f729Sjoerg     return BBInfo.HasReturn;
1987330f729Sjoerg   }
1997330f729Sjoerg 
2007330f729Sjoerg   unsigned int CyclesToEnd = 0;
2017330f729Sjoerg 
2027330f729Sjoerg   for (MachineInstr &MI : *MBB) {
2037330f729Sjoerg     // Mark basic blocks with a return instruction. Calls to other
2047330f729Sjoerg     // functions do not count because the called function will be padded,
2057330f729Sjoerg     // if necessary.
2067330f729Sjoerg     if (MI.isReturn() && !MI.isCall()) {
2077330f729Sjoerg       VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
2087330f729Sjoerg       Cycles += CyclesToEnd;
2097330f729Sjoerg       return true;
2107330f729Sjoerg     }
2117330f729Sjoerg 
2127330f729Sjoerg     CyclesToEnd += TSM.computeInstrLatency(&MI);
2137330f729Sjoerg   }
2147330f729Sjoerg 
2157330f729Sjoerg   VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
2167330f729Sjoerg   Cycles += CyclesToEnd;
2177330f729Sjoerg   return false;
2187330f729Sjoerg }
2197330f729Sjoerg 
2207330f729Sjoerg /// addPadding - Add the given number of NOOP instructions to the function
2217330f729Sjoerg /// just prior to the return at MBBI
addPadding(MachineBasicBlock * MBB,MachineBasicBlock::iterator & MBBI,unsigned int NOOPsToAdd)2227330f729Sjoerg void PadShortFunc::addPadding(MachineBasicBlock *MBB,
2237330f729Sjoerg                               MachineBasicBlock::iterator &MBBI,
2247330f729Sjoerg                               unsigned int NOOPsToAdd) {
225*82d56013Sjoerg   const DebugLoc &DL = MBBI->getDebugLoc();
2267330f729Sjoerg   unsigned IssueWidth = TSM.getIssueWidth();
2277330f729Sjoerg 
2287330f729Sjoerg   for (unsigned i = 0, e = IssueWidth * NOOPsToAdd; i != e; ++i)
2297330f729Sjoerg     BuildMI(*MBB, MBBI, DL, TSM.getInstrInfo()->get(X86::NOOP));
2307330f729Sjoerg }
231