xref: /llvm-project/bolt/lib/Passes/SplitFunctions.cpp (revision 2563fd63c6f774e2b723d3f5246629e60da01eaa)
1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SplitFunctions pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/SplitFunctions.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Core/FunctionLayout.h"
17 #include "bolt/Core/ParallelUtilities.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Sequence.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/iterator_range.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/FormatVariadic.h"
24 #include <algorithm>
25 #include <iterator>
26 #include <memory>
27 #include <numeric>
28 #include <random>
29 #include <vector>
30 
31 #define DEBUG_TYPE "bolt-opts"
32 
33 using namespace llvm;
34 using namespace bolt;
35 
36 namespace {
37 class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> {
38 public:
39   explicit DeprecatedSplitFunctionOptionParser(cl::Option &O)
40       : cl::parser<bool>(O) {}
41 
42   bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) {
43     if (Arg == "2" || Arg == "3") {
44       Value = true;
45       errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" "
46                         "for option -{1} is deprecated\n",
47                         Arg, ArgName);
48       return false;
49     }
50     return cl::parser<bool>::parse(O, ArgName, Arg, Value);
51   }
52 };
53 } // namespace
54 
55 namespace opts {
56 
57 extern cl::OptionCategory BoltOptCategory;
58 
59 extern cl::opt<bool> SplitEH;
60 extern cl::opt<unsigned> ExecutionCountThreshold;
61 extern cl::opt<uint32_t> RandomSeed;
62 
63 static cl::opt<bool> AggressiveSplitting(
64     "split-all-cold", cl::desc("outline as many cold basic blocks as possible"),
65     cl::cat(BoltOptCategory));
66 
67 static cl::opt<unsigned> SplitAlignThreshold(
68     "split-align-threshold",
69     cl::desc("when deciding to split a function, apply this alignment "
70              "while doing the size comparison (see -split-threshold). "
71              "Default value: 2."),
72     cl::init(2),
73 
74     cl::Hidden, cl::cat(BoltOptCategory));
75 
76 static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser>
77     SplitFunctions("split-functions",
78                    cl::desc("split functions into fragments"),
79                    cl::cat(BoltOptCategory));
80 
81 static cl::opt<unsigned> SplitThreshold(
82     "split-threshold",
83     cl::desc("split function only if its main size is reduced by more than "
84              "given amount of bytes. Default value: 0, i.e. split iff the "
85              "size is reduced. Note that on some architectures the size can "
86              "increase after splitting."),
87     cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
88 
89 static cl::opt<SplitFunctionsStrategy> SplitStrategy(
90     "split-strategy", cl::init(SplitFunctionsStrategy::Profile2),
91     cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2",
92                           "split each function into a hot and cold fragment "
93                           "using profiling information")),
94     cl::values(clEnumValN(
95         SplitFunctionsStrategy::Random2, "random2",
96         "split each function into a hot and cold fragment at a randomly chosen "
97         "split point (ignoring any available profiling information)")),
98     cl::values(clEnumValN(
99         SplitFunctionsStrategy::RandomN, "randomN",
100         "split each function into N fragments at a randomly chosen split "
101         "points (ignoring any available profiling information)")),
102     cl::values(clEnumValN(
103         SplitFunctionsStrategy::All, "all",
104         "split all basic blocks of each function into fragments such that each "
105         "fragment contains exactly a single basic block")),
106     cl::desc("strategy used to partition blocks into fragments"),
107     cl::cat(BoltOptCategory));
108 } // namespace opts
109 
110 namespace {
111 bool hasFullProfile(const BinaryFunction &BF) {
112   return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) {
113     return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE;
114   });
115 }
116 
117 bool allBlocksCold(const BinaryFunction &BF) {
118   return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) {
119     return BB.getExecutionCount() == 0;
120   });
121 }
122 
123 struct SplitProfile2 final : public SplitStrategy {
124   bool canSplit(const BinaryFunction &BF) override {
125     return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
126   }
127 
128   bool keepEmpty() override { return false; }
129 
130   void fragment(const BlockIt Start, const BlockIt End) override {
131     for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) {
132       if (BB->getExecutionCount() == 0)
133         BB->setFragmentNum(FragmentNum::cold());
134     }
135   }
136 };
137 
138 struct SplitRandom2 final : public SplitStrategy {
139   std::minstd_rand0 Gen;
140 
141   SplitRandom2() : Gen(opts::RandomSeed.getValue()) {}
142 
143   bool canSplit(const BinaryFunction &BF) override { return true; }
144 
145   bool keepEmpty() override { return false; }
146 
147   void fragment(const BlockIt Start, const BlockIt End) override {
148     using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
149     const DiffT NumBlocks = End - Start;
150     assert(NumBlocks > 0 && "Cannot fragment empty function");
151 
152     // We want to split at least one block
153     const auto LastSplitPoint = std::max<DiffT>(NumBlocks - 1, 1);
154     std::uniform_int_distribution<DiffT> Dist(1, LastSplitPoint);
155     const DiffT SplitPoint = Dist(Gen);
156     for (BinaryBasicBlock *BB : llvm::make_range(Start + SplitPoint, End))
157       BB->setFragmentNum(FragmentNum::cold());
158 
159     LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of "
160                                  "{1} possible) blocks to split\n",
161                                  NumBlocks - SplitPoint, End - Start));
162   }
163 };
164 
165 struct SplitRandomN final : public SplitStrategy {
166   std::minstd_rand0 Gen;
167 
168   SplitRandomN() : Gen(opts::RandomSeed.getValue()) {}
169 
170   bool canSplit(const BinaryFunction &BF) override { return true; }
171 
172   bool keepEmpty() override { return false; }
173 
174   void fragment(const BlockIt Start, const BlockIt End) override {
175     using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
176     const DiffT NumBlocks = End - Start;
177     assert(NumBlocks > 0 && "Cannot fragment empty function");
178 
179     // With n blocks, there are n-1 places to split them.
180     const DiffT MaximumSplits = NumBlocks - 1;
181     // We want to generate at least two fragment if possible, but if there is
182     // only one block, no splits are possible.
183     const auto MinimumSplits = std::min<DiffT>(MaximumSplits, 1);
184     std::uniform_int_distribution<DiffT> Dist(MinimumSplits, MaximumSplits);
185     // Choose how many splits to perform
186     const DiffT NumSplits = Dist(Gen);
187 
188     // Draw split points from a lottery
189     SmallVector<unsigned, 0> Lottery(MaximumSplits);
190     // Start lottery at 1, because there is no meaningful splitpoint before the
191     // first block.
192     std::iota(Lottery.begin(), Lottery.end(), 1u);
193     std::shuffle(Lottery.begin(), Lottery.end(), Gen);
194     Lottery.resize(NumSplits);
195     llvm::sort(Lottery);
196 
197     // Add one past the end entry to lottery
198     Lottery.push_back(NumBlocks);
199 
200     unsigned LotteryIndex = 0;
201     unsigned BBPos = 0;
202     for (BinaryBasicBlock *const BB : make_range(Start, End)) {
203       // Check whether to start new fragment
204       if (BBPos >= Lottery[LotteryIndex])
205         ++LotteryIndex;
206 
207       // Because LotteryIndex is 0 based and cold fragments are 1 based, we can
208       // use the index to assign fragments.
209       BB->setFragmentNum(FragmentNum(LotteryIndex));
210 
211       ++BBPos;
212     }
213   }
214 };
215 
216 struct SplitAll final : public SplitStrategy {
217   bool canSplit(const BinaryFunction &BF) override { return true; }
218 
219   bool keepEmpty() override {
220     // Keeping empty fragments allows us to test, that empty fragments do not
221     // generate symbols.
222     return true;
223   }
224 
225   void fragment(const BlockIt Start, const BlockIt End) override {
226     unsigned Fragment = 0;
227     for (BinaryBasicBlock *const BB : llvm::make_range(Start, End))
228       BB->setFragmentNum(FragmentNum(Fragment++));
229   }
230 };
231 } // namespace
232 
233 namespace llvm {
234 namespace bolt {
235 
236 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const {
237   // Apply execution count threshold
238   if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold)
239     return false;
240 
241   return BinaryFunctionPass::shouldOptimize(BF);
242 }
243 
244 void SplitFunctions::runOnFunctions(BinaryContext &BC) {
245   if (!opts::SplitFunctions)
246     return;
247 
248   std::unique_ptr<SplitStrategy> Strategy;
249   bool ForceSequential = false;
250 
251   switch (opts::SplitStrategy) {
252   case SplitFunctionsStrategy::Profile2:
253     Strategy = std::make_unique<SplitProfile2>();
254     break;
255   case SplitFunctionsStrategy::Random2:
256     Strategy = std::make_unique<SplitRandom2>();
257     // If we split functions randomly, we need to ensure that across runs with
258     // the same input, we generate random numbers for each function in the same
259     // order.
260     ForceSequential = true;
261     break;
262   case SplitFunctionsStrategy::RandomN:
263     Strategy = std::make_unique<SplitRandomN>();
264     ForceSequential = true;
265     break;
266   case SplitFunctionsStrategy::All:
267     Strategy = std::make_unique<SplitAll>();
268     break;
269   }
270 
271   ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
272     return !shouldOptimize(BF);
273   };
274 
275   ParallelUtilities::runOnEachFunction(
276       BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR,
277       [&](BinaryFunction &BF) { splitFunction(BF, *Strategy); }, SkipFunc,
278       "SplitFunctions", ForceSequential);
279 
280   if (SplitBytesHot + SplitBytesCold > 0)
281     outs() << "BOLT-INFO: splitting separates " << SplitBytesHot
282            << " hot bytes from " << SplitBytesCold << " cold bytes "
283            << format("(%.2lf%% of split functions is hot).\n",
284                      100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
285 }
286 
287 void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
288   if (BF.empty())
289     return;
290 
291   if (!S.canSplit(BF))
292     return;
293 
294   FunctionLayout &Layout = BF.getLayout();
295   BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(),
296                                                      Layout.block_end());
297 
298   BinaryContext &BC = BF.getBinaryContext();
299   size_t OriginalHotSize;
300   size_t HotSize;
301   size_t ColdSize;
302   if (BC.isX86()) {
303     std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF);
304     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
305                       << " pre-split is <0x"
306                       << Twine::utohexstr(OriginalHotSize) << ", 0x"
307                       << Twine::utohexstr(ColdSize) << ">\n");
308   }
309 
310   BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(),
311                                                 Layout.block_end());
312   // Never outline the first basic block.
313   NewLayout.front()->setCanOutline(false);
314   for (BinaryBasicBlock *const BB : NewLayout) {
315     if (!BB->canOutline())
316       continue;
317 
318     // Do not split extra entry points in aarch64. They can be referred by
319     // using ADRs and when this happens, these blocks cannot be placed far
320     // away due to the limited range in ADR instruction.
321     if (BC.isAArch64() && BB->isEntryPoint()) {
322       BB->setCanOutline(false);
323       continue;
324     }
325 
326     if (BF.hasEHRanges() && !opts::SplitEH) {
327       // We cannot move landing pads (or rather entry points for landing pads).
328       if (BB->isLandingPad()) {
329         BB->setCanOutline(false);
330         continue;
331       }
332       // We cannot move a block that can throw since exception-handling
333       // runtime cannot deal with split functions. However, if we can guarantee
334       // that the block never throws, it is safe to move the block to
335       // decrease the size of the function.
336       for (MCInst &Instr : *BB) {
337         if (BC.MIB->isInvoke(Instr)) {
338           BB->setCanOutline(false);
339           break;
340         }
341       }
342     }
343   }
344 
345   BF.getLayout().updateLayoutIndices();
346   S.fragment(NewLayout.begin(), NewLayout.end());
347 
348   // Make sure all non-outlineable blocks are in the main-fragment.
349   for (BinaryBasicBlock *const BB : NewLayout) {
350     if (!BB->canOutline())
351       BB->setFragmentNum(FragmentNum::main());
352   }
353 
354   if (opts::AggressiveSplitting) {
355     // All blocks with 0 count that we can move go to the end of the function.
356     // Even if they were natural to cluster formation and were seen in-between
357     // hot basic blocks.
358     llvm::stable_sort(NewLayout, [&](const BinaryBasicBlock *const A,
359                                      const BinaryBasicBlock *const B) {
360       return A->getFragmentNum() < B->getFragmentNum();
361     });
362   } else if (BF.hasEHRanges() && !opts::SplitEH) {
363     // Typically functions with exception handling have landing pads at the end.
364     // We cannot move beginning of landing pads, but we can move 0-count blocks
365     // comprising landing pads to the end and thus facilitate splitting.
366     auto FirstLP = NewLayout.begin();
367     while ((*FirstLP)->isLandingPad())
368       ++FirstLP;
369 
370     std::stable_sort(FirstLP, NewLayout.end(),
371                      [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
372                        return A->getFragmentNum() < B->getFragmentNum();
373                      });
374   }
375 
376   // Make sure that fragments are increasing.
377   FragmentNum CurrentFragment = NewLayout.back()->getFragmentNum();
378   for (BinaryBasicBlock *const BB : reverse(NewLayout)) {
379     if (BB->getFragmentNum() > CurrentFragment)
380       BB->setFragmentNum(CurrentFragment);
381     CurrentFragment = BB->getFragmentNum();
382   }
383 
384   if (!S.keepEmpty()) {
385     FragmentNum CurrentFragment = FragmentNum::main();
386     FragmentNum NewFragment = FragmentNum::main();
387     for (BinaryBasicBlock *const BB : NewLayout) {
388       if (BB->getFragmentNum() > CurrentFragment) {
389         CurrentFragment = BB->getFragmentNum();
390         NewFragment = FragmentNum(NewFragment.get() + 1);
391       }
392       BB->setFragmentNum(NewFragment);
393     }
394   }
395 
396   BF.getLayout().update(NewLayout);
397 
398   // For shared objects, invoke instructions and corresponding landing pads
399   // have to be placed in the same fragment. When we split them, create
400   // trampoline landing pads that will redirect the execution to real LPs.
401   TrampolineSetType Trampolines;
402   if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
403     Trampolines = createEHTrampolines(BF);
404 
405   // Check the new size to see if it's worth splitting the function.
406   if (BC.isX86() && BF.isSplit()) {
407     std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
408     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
409                       << " post-split is <0x" << Twine::utohexstr(HotSize)
410                       << ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
411     if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
412         alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
413       LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n  0x"
414                         << Twine::utohexstr(HotSize) << ", 0x"
415                         << Twine::utohexstr(ColdSize) << " -> 0x"
416                         << Twine::utohexstr(OriginalHotSize) << '\n');
417 
418       // Reverse the action of createEHTrampolines(). The trampolines will be
419       // placed immediately before the matching destination resulting in no
420       // extra code.
421       if (PreSplitLayout.size() != BF.size())
422         PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines);
423 
424       for (BinaryBasicBlock &BB : BF)
425         BB.setFragmentNum(FragmentNum::main());
426       BF.getLayout().update(PreSplitLayout);
427     } else {
428       SplitBytesHot += HotSize;
429       SplitBytesCold += ColdSize;
430     }
431   }
432 }
433 
434 SplitFunctions::TrampolineSetType
435 SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
436   const auto &MIB = BF.getBinaryContext().MIB;
437 
438   // Map real landing pads to the corresponding trampolines.
439   TrampolineSetType LPTrampolines;
440 
441   // Iterate over the copy of basic blocks since we are adding new blocks to the
442   // function which will invalidate its iterators.
443   std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend());
444   for (BinaryBasicBlock *BB : Blocks) {
445     for (MCInst &Instr : *BB) {
446       const std::optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr);
447       if (!EHInfo || !EHInfo->first)
448         continue;
449 
450       const MCSymbol *LPLabel = EHInfo->first;
451       BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel);
452       if (BB->getFragmentNum() == LPBlock->getFragmentNum())
453         continue;
454 
455       const MCSymbol *TrampolineLabel = nullptr;
456       const TrampolineKey Key(BB->getFragmentNum(), LPLabel);
457       auto Iter = LPTrampolines.find(Key);
458       if (Iter != LPTrampolines.end()) {
459         TrampolineLabel = Iter->second;
460       } else {
461         // Create a trampoline basic block in the same fragment as the thrower.
462         // Note: there's no need to insert the jump instruction, it will be
463         // added by fixBranches().
464         BinaryBasicBlock *TrampolineBB = BF.addBasicBlock();
465         TrampolineBB->setFragmentNum(BB->getFragmentNum());
466         TrampolineBB->setExecutionCount(LPBlock->getExecutionCount());
467         TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount());
468         TrampolineBB->setCFIState(LPBlock->getCFIState());
469         TrampolineLabel = TrampolineBB->getLabel();
470         LPTrampolines.insert(std::make_pair(Key, TrampolineLabel));
471       }
472 
473       // Substitute the landing pad with the trampoline.
474       MIB->updateEHInfo(Instr,
475                         MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second));
476     }
477   }
478 
479   if (LPTrampolines.empty())
480     return LPTrampolines;
481 
482   // All trampoline blocks were added to the end of the function. Place them at
483   // the end of corresponding fragments.
484   BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(),
485                                                 BF.getLayout().block_end());
486   stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
487     return A->getFragmentNum() < B->getFragmentNum();
488   });
489   BF.getLayout().update(NewLayout);
490 
491   // Conservatively introduce branch instructions.
492   BF.fixBranches();
493 
494   // Update exception-handling CFG for the function.
495   BF.recomputeLandingPads();
496 
497   return LPTrampolines;
498 }
499 
500 SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines(
501     BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout,
502     const SplitFunctions::TrampolineSetType &Trampolines) const {
503   DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>>
504       IncomingTrampolines;
505   for (const auto &Entry : Trampolines) {
506     IncomingTrampolines[Entry.getFirst().Target].emplace_back(
507         Entry.getSecond());
508   }
509 
510   BasicBlockOrderType MergedLayout;
511   for (BinaryBasicBlock *BB : Layout) {
512     auto Iter = IncomingTrampolines.find(BB->getLabel());
513     if (Iter != IncomingTrampolines.end()) {
514       for (const MCSymbol *const Trampoline : Iter->getSecond()) {
515         BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Trampoline);
516         assert(LPBlock && "Could not find matching landing pad block.");
517         MergedLayout.push_back(LPBlock);
518       }
519     }
520     MergedLayout.push_back(BB);
521   }
522 
523   return MergedLayout;
524 }
525 
526 } // namespace bolt
527 } // namespace llvm
528