xref: /llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp (revision 90ab85a1b2e72f63039fadf6669b23f52192defd)
1 //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Uses profile information to split out cold blocks.
11 //
12 // This pass splits out cold machine basic blocks from the parent function. This
13 // implementation leverages the basic block section framework. Blocks marked
14 // cold by this pass are grouped together in a separate section prefixed with
15 // ".text.unlikely.*". The linker can then group these together as a cold
16 // section. The split part of the function is a contiguous region identified by
17 // the symbol "foo.cold". Grouping all cold blocks across functions together
18 // decreases fragmentation and improves icache and itlb utilization. Note that
19 // the overall changes to the binary size are negligible; only a small number of
20 // additional jump instructions may be introduced.
21 //
22 // For the original RFC of this pass please see
23 // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/Analysis/BlockFrequencyInfo.h"
28 #include "llvm/Analysis/BranchProbabilityInfo.h"
29 #include "llvm/Analysis/EHUtils.h"
30 #include "llvm/Analysis/ProfileSummaryInfo.h"
31 #include "llvm/CodeGen/BasicBlockSectionUtils.h"
32 #include "llvm/CodeGen/MachineBasicBlock.h"
33 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
34 #include "llvm/CodeGen/MachineFunction.h"
35 #include "llvm/CodeGen/MachineFunctionPass.h"
36 #include "llvm/CodeGen/MachineModuleInfo.h"
37 #include "llvm/CodeGen/Passes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/InitializePasses.h"
40 #include "llvm/Support/CommandLine.h"
41 #include <optional>
42 
43 using namespace llvm;
44 
45 // FIXME: This cutoff value is CPU dependent and should be moved to
46 // TargetTransformInfo once we consider enabling this on other platforms.
47 // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
48 // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
49 // The default was empirically determined to be optimal when considering cutoff
50 // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
51 // Intel CPUs.
52 static cl::opt<unsigned>
53     PercentileCutoff("mfs-psi-cutoff",
54                      cl::desc("Percentile profile summary cutoff used to "
55                               "determine cold blocks. Unused if set to zero."),
56                      cl::init(999950), cl::Hidden);
57 
58 static cl::opt<unsigned> ColdCountThreshold(
59     "mfs-count-threshold",
60     cl::desc(
61         "Minimum number of times a block must be executed to be retained."),
62     cl::init(1), cl::Hidden);
63 
64 static cl::opt<bool> SplitAllEHCode(
65     "mfs-split-ehcode",
66     cl::desc("Splits all EH code and it's descendants by default."),
67     cl::init(false), cl::Hidden);
68 
69 static cl::opt<bool> AllowUnsupportedTriple(
70     "mfs-allow-unsupported-triple",
71     cl::desc(
72         "Splits functions even if the target triple isn't supported. This is "
73         "testing flag for targets that don't yet support function splitting."),
74     cl::init(false), cl::Hidden);
75 
76 namespace {
77 
78 class MachineFunctionSplitter : public MachineFunctionPass {
79 public:
80   static char ID;
81   MachineFunctionSplitter() : MachineFunctionPass(ID) {
82     initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
83   }
84 
85   StringRef getPassName() const override {
86     return "Machine Function Splitter Transformation";
87   }
88 
89   void getAnalysisUsage(AnalysisUsage &AU) const override;
90 
91   bool runOnMachineFunction(MachineFunction &F) override;
92 };
93 } // end anonymous namespace
94 
95 /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
96 /// only by EH pad as cold. This will help mark EH pads statically cold
97 /// instead of relying on profile data.
98 static void setDescendantEHBlocksCold(MachineFunction &MF) {
99   DenseSet<MachineBasicBlock *> EHBlocks;
100   computeEHOnlyBlocks(MF, EHBlocks);
101   for (auto Block : EHBlocks) {
102     Block->setSectionID(MBBSectionID::ColdSectionID);
103   }
104 }
105 
106 static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) {
107   auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
108     return X.getSectionID().Type < Y.getSectionID().Type;
109   };
110   llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
111   llvm::avoidZeroOffsetLandingPad(MF);
112 }
113 
114 static bool isColdBlock(const MachineBasicBlock &MBB,
115                         const MachineBlockFrequencyInfo *MBFI,
116                         ProfileSummaryInfo *PSI) {
117   std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
118   // For instrumentation profiles and sample profiles, we use different ways
119   // to judge whether a block is cold and should be split.
120   if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) {
121     // If using instrument profile, which is deemed "accurate", no count means
122     // cold.
123     if (!Count)
124       return true;
125     if (PercentileCutoff > 0)
126       return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
127     // Fallthrough to end of function.
128   } else if (PSI->hasSampleProfile()) {
129     // For sample profile, no count means "do not judege coldness".
130     if (!Count)
131       return false;
132   }
133 
134   return (*Count < ColdCountThreshold);
135 }
136 
137 bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
138   // We target functions with profile data. Static information in the form
139   // of exception handling code may be split to cold if user passes the
140   // mfs-split-ehcode flag.
141   bool UseProfileData = MF.getFunction().hasProfileData();
142   if (!UseProfileData && !SplitAllEHCode)
143     return false;
144 
145   // TODO: We don't split functions where a section attribute has been set
146   // since the split part may not be placed in a contiguous region. It may also
147   // be more beneficial to augment the linker to ensure contiguous layout of
148   // split functions within the same section as specified by the attribute.
149   if (MF.getFunction().hasSection() ||
150       MF.getFunction().hasFnAttribute("implicit-section-name"))
151     return false;
152 
153   // We don't want to proceed further for cold functions
154   // or functions of unknown hotness. Lukewarm functions have no prefix.
155   std::optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
156   if (SectionPrefix &&
157       (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) {
158     return false;
159   }
160 
161   // Renumbering blocks here preserves the order of the blocks as
162   // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
163   // blocks. Preserving the order of blocks is essential to retaining decisions
164   // made by prior passes such as MachineBlockPlacement.
165   MF.RenumberBlocks();
166   MF.setBBSectionsType(BasicBlockSection::Preset);
167 
168   MachineBlockFrequencyInfo *MBFI = nullptr;
169   ProfileSummaryInfo *PSI = nullptr;
170   if (UseProfileData) {
171     MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
172     PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
173     // If we don't have a good profile (sample profile is not deemed
174     // as a "good profile") and the function is not hot, then early
175     // return. (Because we can only trust hot functions when profile
176     // quality is not good.)
177     if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
178       // Split all EH code and it's descendant statically by default.
179       if (SplitAllEHCode)
180         setDescendantEHBlocksCold(MF);
181       finishAdjustingBasicBlocksAndLandingPads(MF);
182       return true;
183     }
184   }
185 
186   SmallVector<MachineBasicBlock *, 2> LandingPads;
187   for (auto &MBB : MF) {
188     if (MBB.isEntryBlock())
189       continue;
190 
191     if (MBB.isEHPad())
192       LandingPads.push_back(&MBB);
193     else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
194       MBB.setSectionID(MBBSectionID::ColdSectionID);
195   }
196 
197   // Split all EH code and it's descendant statically by default.
198   if (SplitAllEHCode)
199     setDescendantEHBlocksCold(MF);
200   // We only split out eh pads if all of them are cold.
201   else {
202     // Here we have UseProfileData == true.
203     bool HasHotLandingPads = false;
204     for (const MachineBasicBlock *LP : LandingPads) {
205       if (!isColdBlock(*LP, MBFI, PSI))
206         HasHotLandingPads = true;
207     }
208     if (!HasHotLandingPads) {
209       for (MachineBasicBlock *LP : LandingPads)
210         LP->setSectionID(MBBSectionID::ColdSectionID);
211     }
212   }
213 
214   finishAdjustingBasicBlocksAndLandingPads(MF);
215   return true;
216 }
217 
218 void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
219   AU.addRequired<MachineModuleInfoWrapperPass>();
220   AU.addRequired<MachineBlockFrequencyInfo>();
221   AU.addRequired<ProfileSummaryInfoWrapperPass>();
222 }
223 
224 char MachineFunctionSplitter::ID = 0;
225 INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
226                 "Split machine functions using profile information", false,
227                 false)
228 
229 MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
230   return new MachineFunctionSplitter();
231 }
232