17330f729Sjoerg //===-------- X86PadShortFunction.cpp - pad short functions -----------===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // This file defines the pass which will pad short functions to prevent
107330f729Sjoerg // a stall if a function returns before the return address is ready. This
117330f729Sjoerg // is needed for some Intel Atom processors.
127330f729Sjoerg //
137330f729Sjoerg //===----------------------------------------------------------------------===//
147330f729Sjoerg
157330f729Sjoerg
167330f729Sjoerg #include "X86.h"
177330f729Sjoerg #include "X86InstrInfo.h"
187330f729Sjoerg #include "X86Subtarget.h"
197330f729Sjoerg #include "llvm/ADT/Statistic.h"
20*82d56013Sjoerg #include "llvm/Analysis/ProfileSummaryInfo.h"
21*82d56013Sjoerg #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
227330f729Sjoerg #include "llvm/CodeGen/MachineFunctionPass.h"
237330f729Sjoerg #include "llvm/CodeGen/MachineInstrBuilder.h"
24*82d56013Sjoerg #include "llvm/CodeGen/MachineSizeOpts.h"
257330f729Sjoerg #include "llvm/CodeGen/Passes.h"
267330f729Sjoerg #include "llvm/CodeGen/TargetSchedule.h"
277330f729Sjoerg #include "llvm/IR/Function.h"
287330f729Sjoerg #include "llvm/Support/Debug.h"
297330f729Sjoerg #include "llvm/Support/raw_ostream.h"
307330f729Sjoerg
317330f729Sjoerg using namespace llvm;
327330f729Sjoerg
337330f729Sjoerg #define DEBUG_TYPE "x86-pad-short-functions"
347330f729Sjoerg
357330f729Sjoerg STATISTIC(NumBBsPadded, "Number of basic blocks padded");
367330f729Sjoerg
377330f729Sjoerg namespace {
387330f729Sjoerg struct VisitedBBInfo {
397330f729Sjoerg // HasReturn - Whether the BB contains a return instruction
407330f729Sjoerg bool HasReturn;
417330f729Sjoerg
427330f729Sjoerg // Cycles - Number of cycles until return if HasReturn is true, otherwise
437330f729Sjoerg // number of cycles until end of the BB
447330f729Sjoerg unsigned int Cycles;
457330f729Sjoerg
VisitedBBInfo__anon0199e2f70111::VisitedBBInfo467330f729Sjoerg VisitedBBInfo() : HasReturn(false), Cycles(0) {}
VisitedBBInfo__anon0199e2f70111::VisitedBBInfo477330f729Sjoerg VisitedBBInfo(bool HasReturn, unsigned int Cycles)
487330f729Sjoerg : HasReturn(HasReturn), Cycles(Cycles) {}
497330f729Sjoerg };
507330f729Sjoerg
517330f729Sjoerg struct PadShortFunc : public MachineFunctionPass {
527330f729Sjoerg static char ID;
PadShortFunc__anon0199e2f70111::PadShortFunc537330f729Sjoerg PadShortFunc() : MachineFunctionPass(ID)
547330f729Sjoerg , Threshold(4) {}
557330f729Sjoerg
567330f729Sjoerg bool runOnMachineFunction(MachineFunction &MF) override;
577330f729Sjoerg
getAnalysisUsage__anon0199e2f70111::PadShortFunc58*82d56013Sjoerg void getAnalysisUsage(AnalysisUsage &AU) const override {
59*82d56013Sjoerg AU.addRequired<ProfileSummaryInfoWrapperPass>();
60*82d56013Sjoerg AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
61*82d56013Sjoerg AU.addPreserved<LazyMachineBlockFrequencyInfoPass>();
62*82d56013Sjoerg MachineFunctionPass::getAnalysisUsage(AU);
63*82d56013Sjoerg }
64*82d56013Sjoerg
getRequiredProperties__anon0199e2f70111::PadShortFunc657330f729Sjoerg MachineFunctionProperties getRequiredProperties() const override {
667330f729Sjoerg return MachineFunctionProperties().set(
677330f729Sjoerg MachineFunctionProperties::Property::NoVRegs);
687330f729Sjoerg }
697330f729Sjoerg
getPassName__anon0199e2f70111::PadShortFunc707330f729Sjoerg StringRef getPassName() const override {
717330f729Sjoerg return "X86 Atom pad short functions";
727330f729Sjoerg }
737330f729Sjoerg
747330f729Sjoerg private:
757330f729Sjoerg void findReturns(MachineBasicBlock *MBB,
767330f729Sjoerg unsigned int Cycles = 0);
777330f729Sjoerg
787330f729Sjoerg bool cyclesUntilReturn(MachineBasicBlock *MBB,
797330f729Sjoerg unsigned int &Cycles);
807330f729Sjoerg
817330f729Sjoerg void addPadding(MachineBasicBlock *MBB,
827330f729Sjoerg MachineBasicBlock::iterator &MBBI,
837330f729Sjoerg unsigned int NOOPsToAdd);
847330f729Sjoerg
857330f729Sjoerg const unsigned int Threshold;
867330f729Sjoerg
877330f729Sjoerg // ReturnBBs - Maps basic blocks that return to the minimum number of
887330f729Sjoerg // cycles until the return, starting from the entry block.
897330f729Sjoerg DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs;
907330f729Sjoerg
917330f729Sjoerg // VisitedBBs - Cache of previously visited BBs.
927330f729Sjoerg DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
937330f729Sjoerg
947330f729Sjoerg TargetSchedModel TSM;
957330f729Sjoerg };
967330f729Sjoerg
977330f729Sjoerg char PadShortFunc::ID = 0;
987330f729Sjoerg }
997330f729Sjoerg
createX86PadShortFunctions()1007330f729Sjoerg FunctionPass *llvm::createX86PadShortFunctions() {
1017330f729Sjoerg return new PadShortFunc();
1027330f729Sjoerg }
1037330f729Sjoerg
1047330f729Sjoerg /// runOnMachineFunction - Loop over all of the basic blocks, inserting
1057330f729Sjoerg /// NOOP instructions before early exits.
runOnMachineFunction(MachineFunction & MF)1067330f729Sjoerg bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
1077330f729Sjoerg if (skipFunction(MF.getFunction()))
1087330f729Sjoerg return false;
1097330f729Sjoerg
1107330f729Sjoerg if (MF.getFunction().hasOptSize())
1117330f729Sjoerg return false;
1127330f729Sjoerg
1137330f729Sjoerg if (!MF.getSubtarget<X86Subtarget>().padShortFunctions())
1147330f729Sjoerg return false;
1157330f729Sjoerg
1167330f729Sjoerg TSM.init(&MF.getSubtarget());
1177330f729Sjoerg
118*82d56013Sjoerg auto *PSI =
119*82d56013Sjoerg &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
120*82d56013Sjoerg auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
121*82d56013Sjoerg &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
122*82d56013Sjoerg nullptr;
123*82d56013Sjoerg
1247330f729Sjoerg // Search through basic blocks and mark the ones that have early returns
1257330f729Sjoerg ReturnBBs.clear();
1267330f729Sjoerg VisitedBBs.clear();
1277330f729Sjoerg findReturns(&MF.front());
1287330f729Sjoerg
1297330f729Sjoerg bool MadeChange = false;
1307330f729Sjoerg
1317330f729Sjoerg // Pad the identified basic blocks with NOOPs
1327330f729Sjoerg for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
1337330f729Sjoerg I != ReturnBBs.end(); ++I) {
1347330f729Sjoerg MachineBasicBlock *MBB = I->first;
1357330f729Sjoerg unsigned Cycles = I->second;
1367330f729Sjoerg
137*82d56013Sjoerg // Function::hasOptSize is already checked above.
138*82d56013Sjoerg bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
139*82d56013Sjoerg if (OptForSize)
140*82d56013Sjoerg continue;
141*82d56013Sjoerg
1427330f729Sjoerg if (Cycles < Threshold) {
1437330f729Sjoerg // BB ends in a return. Skip over any DBG_VALUE instructions
1447330f729Sjoerg // trailing the terminator.
1457330f729Sjoerg assert(MBB->size() > 0 &&
1467330f729Sjoerg "Basic block should contain at least a RET but is empty");
1477330f729Sjoerg MachineBasicBlock::iterator ReturnLoc = --MBB->end();
1487330f729Sjoerg
1497330f729Sjoerg while (ReturnLoc->isDebugInstr())
1507330f729Sjoerg --ReturnLoc;
1517330f729Sjoerg assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
1527330f729Sjoerg "Basic block does not end with RET");
1537330f729Sjoerg
1547330f729Sjoerg addPadding(MBB, ReturnLoc, Threshold - Cycles);
1557330f729Sjoerg NumBBsPadded++;
1567330f729Sjoerg MadeChange = true;
1577330f729Sjoerg }
1587330f729Sjoerg }
1597330f729Sjoerg
1607330f729Sjoerg return MadeChange;
1617330f729Sjoerg }
1627330f729Sjoerg
1637330f729Sjoerg /// findReturn - Starting at MBB, follow control flow and add all
1647330f729Sjoerg /// basic blocks that contain a return to ReturnBBs.
findReturns(MachineBasicBlock * MBB,unsigned int Cycles)1657330f729Sjoerg void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
1667330f729Sjoerg // If this BB has a return, note how many cycles it takes to get there.
1677330f729Sjoerg bool hasReturn = cyclesUntilReturn(MBB, Cycles);
1687330f729Sjoerg if (Cycles >= Threshold)
1697330f729Sjoerg return;
1707330f729Sjoerg
1717330f729Sjoerg if (hasReturn) {
1727330f729Sjoerg ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
1737330f729Sjoerg return;
1747330f729Sjoerg }
1757330f729Sjoerg
1767330f729Sjoerg // Follow branches in BB and look for returns
1777330f729Sjoerg for (MachineBasicBlock::succ_iterator I = MBB->succ_begin();
1787330f729Sjoerg I != MBB->succ_end(); ++I) {
1797330f729Sjoerg if (*I == MBB)
1807330f729Sjoerg continue;
1817330f729Sjoerg findReturns(*I, Cycles);
1827330f729Sjoerg }
1837330f729Sjoerg }
1847330f729Sjoerg
1857330f729Sjoerg /// cyclesUntilReturn - return true if the MBB has a return instruction,
1867330f729Sjoerg /// and return false otherwise.
1877330f729Sjoerg /// Cycles will be incremented by the number of cycles taken to reach the
1887330f729Sjoerg /// return or the end of the BB, whichever occurs first.
cyclesUntilReturn(MachineBasicBlock * MBB,unsigned int & Cycles)1897330f729Sjoerg bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
1907330f729Sjoerg unsigned int &Cycles) {
1917330f729Sjoerg // Return cached result if BB was previously visited
1927330f729Sjoerg DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it
1937330f729Sjoerg = VisitedBBs.find(MBB);
1947330f729Sjoerg if (it != VisitedBBs.end()) {
1957330f729Sjoerg VisitedBBInfo BBInfo = it->second;
1967330f729Sjoerg Cycles += BBInfo.Cycles;
1977330f729Sjoerg return BBInfo.HasReturn;
1987330f729Sjoerg }
1997330f729Sjoerg
2007330f729Sjoerg unsigned int CyclesToEnd = 0;
2017330f729Sjoerg
2027330f729Sjoerg for (MachineInstr &MI : *MBB) {
2037330f729Sjoerg // Mark basic blocks with a return instruction. Calls to other
2047330f729Sjoerg // functions do not count because the called function will be padded,
2057330f729Sjoerg // if necessary.
2067330f729Sjoerg if (MI.isReturn() && !MI.isCall()) {
2077330f729Sjoerg VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
2087330f729Sjoerg Cycles += CyclesToEnd;
2097330f729Sjoerg return true;
2107330f729Sjoerg }
2117330f729Sjoerg
2127330f729Sjoerg CyclesToEnd += TSM.computeInstrLatency(&MI);
2137330f729Sjoerg }
2147330f729Sjoerg
2157330f729Sjoerg VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
2167330f729Sjoerg Cycles += CyclesToEnd;
2177330f729Sjoerg return false;
2187330f729Sjoerg }
2197330f729Sjoerg
2207330f729Sjoerg /// addPadding - Add the given number of NOOP instructions to the function
2217330f729Sjoerg /// just prior to the return at MBBI
addPadding(MachineBasicBlock * MBB,MachineBasicBlock::iterator & MBBI,unsigned int NOOPsToAdd)2227330f729Sjoerg void PadShortFunc::addPadding(MachineBasicBlock *MBB,
2237330f729Sjoerg MachineBasicBlock::iterator &MBBI,
2247330f729Sjoerg unsigned int NOOPsToAdd) {
225*82d56013Sjoerg const DebugLoc &DL = MBBI->getDebugLoc();
2267330f729Sjoerg unsigned IssueWidth = TSM.getIssueWidth();
2277330f729Sjoerg
2287330f729Sjoerg for (unsigned i = 0, e = IssueWidth * NOOPsToAdd; i != e; ++i)
2297330f729Sjoerg BuildMI(*MBB, MBBI, DL, TSM.getInstrInfo()->get(X86::NOOP));
2307330f729Sjoerg }
231