xref: /llvm-project/bolt/lib/Passes/SplitFunctions.cpp (revision c30ab9dcff3f692001dfe5c0d1776692c47796cb)
1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SplitFunctions pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/SplitFunctions.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Core/FunctionLayout.h"
17 #include "bolt/Core/ParallelUtilities.h"
18 #include "bolt/Utils/CommandLineOpts.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Sequence.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/FormatVariadic.h"
25 #include <algorithm>
26 #include <iterator>
27 #include <memory>
28 #include <numeric>
29 #include <random>
30 #include <vector>
31 
32 #define DEBUG_TYPE "bolt-opts"
33 
34 using namespace llvm;
35 using namespace bolt;
36 
37 namespace {
38 class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> {
39 public:
40   explicit DeprecatedSplitFunctionOptionParser(cl::Option &O)
41       : cl::parser<bool>(O) {}
42 
43   bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) {
44     if (Arg == "2" || Arg == "3") {
45       Value = true;
46       errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" "
47                         "for option -{1} is deprecated\n",
48                         Arg, ArgName);
49       return false;
50     }
51     return cl::parser<bool>::parse(O, ArgName, Arg, Value);
52   }
53 };
54 } // namespace
55 
56 namespace opts {
57 
58 extern cl::OptionCategory BoltOptCategory;
59 
60 extern cl::opt<bool> SplitEH;
61 extern cl::opt<unsigned> ExecutionCountThreshold;
62 extern cl::opt<uint32_t> RandomSeed;
63 
64 static cl::opt<bool> AggressiveSplitting(
65     "split-all-cold", cl::desc("outline as many cold basic blocks as possible"),
66     cl::cat(BoltOptCategory));
67 
68 static cl::opt<unsigned> SplitAlignThreshold(
69     "split-align-threshold",
70     cl::desc("when deciding to split a function, apply this alignment "
71              "while doing the size comparison (see -split-threshold). "
72              "Default value: 2."),
73     cl::init(2),
74 
75     cl::Hidden, cl::cat(BoltOptCategory));
76 
77 static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser>
78     SplitFunctions("split-functions",
79                    cl::desc("split functions into fragments"),
80                    cl::cat(BoltOptCategory));
81 
82 static cl::opt<unsigned> SplitThreshold(
83     "split-threshold",
84     cl::desc("split function only if its main size is reduced by more than "
85              "given amount of bytes. Default value: 0, i.e. split iff the "
86              "size is reduced. Note that on some architectures the size can "
87              "increase after splitting."),
88     cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
89 
90 static cl::opt<SplitFunctionsStrategy> SplitStrategy(
91     "split-strategy", cl::init(SplitFunctionsStrategy::Profile2),
92     cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2",
93                           "split each function into a hot and cold fragment "
94                           "using profiling information")),
95     cl::values(clEnumValN(
96         SplitFunctionsStrategy::Random2, "random2",
97         "split each function into a hot and cold fragment at a randomly chosen "
98         "split point (ignoring any available profiling information)")),
99     cl::values(clEnumValN(
100         SplitFunctionsStrategy::RandomN, "randomN",
101         "split each function into N fragments at a randomly chosen split "
102         "points (ignoring any available profiling information)")),
103     cl::values(clEnumValN(
104         SplitFunctionsStrategy::All, "all",
105         "split all basic blocks of each function into fragments such that each "
106         "fragment contains exactly a single basic block")),
107     cl::desc("strategy used to partition blocks into fragments"),
108     cl::cat(BoltOptCategory));
109 } // namespace opts
110 
111 namespace {
112 bool hasFullProfile(const BinaryFunction &BF) {
113   return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) {
114     return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE;
115   });
116 }
117 
118 bool allBlocksCold(const BinaryFunction &BF) {
119   return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) {
120     return BB.getExecutionCount() == 0;
121   });
122 }
123 
124 struct SplitProfile2 final : public SplitStrategy {
125   bool canSplit(const BinaryFunction &BF) override {
126     return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
127   }
128 
129   bool keepEmpty() override { return false; }
130 
131   void fragment(const BlockIt Start, const BlockIt End) override {
132     for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) {
133       if (BB->getExecutionCount() == 0)
134         BB->setFragmentNum(FragmentNum::cold());
135     }
136   }
137 };
138 
139 struct SplitRandom2 final : public SplitStrategy {
140   std::minstd_rand0 Gen;
141 
142   SplitRandom2() : Gen(opts::RandomSeed.getValue()) {}
143 
144   bool canSplit(const BinaryFunction &BF) override { return true; }
145 
146   bool keepEmpty() override { return false; }
147 
148   void fragment(const BlockIt Start, const BlockIt End) override {
149     using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
150     const DiffT NumBlocks = End - Start;
151     assert(NumBlocks > 0 && "Cannot fragment empty function");
152 
153     // We want to split at least one block
154     const auto LastSplitPoint = std::max<DiffT>(NumBlocks - 1, 1);
155     std::uniform_int_distribution<DiffT> Dist(1, LastSplitPoint);
156     const DiffT SplitPoint = Dist(Gen);
157     for (BinaryBasicBlock *BB : llvm::make_range(Start + SplitPoint, End))
158       BB->setFragmentNum(FragmentNum::cold());
159 
160     LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of "
161                                  "{1} possible) blocks to split\n",
162                                  NumBlocks - SplitPoint, End - Start));
163   }
164 };
165 
166 struct SplitRandomN final : public SplitStrategy {
167   std::minstd_rand0 Gen;
168 
169   SplitRandomN() : Gen(opts::RandomSeed.getValue()) {}
170 
171   bool canSplit(const BinaryFunction &BF) override { return true; }
172 
173   bool keepEmpty() override { return false; }
174 
175   void fragment(const BlockIt Start, const BlockIt End) override {
176     using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
177     const DiffT NumBlocks = End - Start;
178     assert(NumBlocks > 0 && "Cannot fragment empty function");
179 
180     // With n blocks, there are n-1 places to split them.
181     const DiffT MaximumSplits = NumBlocks - 1;
182     // We want to generate at least two fragment if possible, but if there is
183     // only one block, no splits are possible.
184     const auto MinimumSplits = std::min<DiffT>(MaximumSplits, 1);
185     std::uniform_int_distribution<DiffT> Dist(MinimumSplits, MaximumSplits);
186     // Choose how many splits to perform
187     const DiffT NumSplits = Dist(Gen);
188 
189     // Draw split points from a lottery
190     SmallVector<unsigned, 0> Lottery(MaximumSplits);
191     // Start lottery at 1, because there is no meaningful splitpoint before the
192     // first block.
193     std::iota(Lottery.begin(), Lottery.end(), 1u);
194     std::shuffle(Lottery.begin(), Lottery.end(), Gen);
195     Lottery.resize(NumSplits);
196     llvm::sort(Lottery);
197 
198     // Add one past the end entry to lottery
199     Lottery.push_back(NumBlocks);
200 
201     unsigned LotteryIndex = 0;
202     unsigned BBPos = 0;
203     for (BinaryBasicBlock *const BB : make_range(Start, End)) {
204       // Check whether to start new fragment
205       if (BBPos >= Lottery[LotteryIndex])
206         ++LotteryIndex;
207 
208       // Because LotteryIndex is 0 based and cold fragments are 1 based, we can
209       // use the index to assign fragments.
210       BB->setFragmentNum(FragmentNum(LotteryIndex));
211 
212       ++BBPos;
213     }
214   }
215 };
216 
217 struct SplitAll final : public SplitStrategy {
218   bool canSplit(const BinaryFunction &BF) override { return true; }
219 
220   bool keepEmpty() override {
221     // Keeping empty fragments allows us to test, that empty fragments do not
222     // generate symbols.
223     return true;
224   }
225 
226   void fragment(const BlockIt Start, const BlockIt End) override {
227     unsigned Fragment = 0;
228     for (BinaryBasicBlock *const BB : llvm::make_range(Start, End))
229       BB->setFragmentNum(FragmentNum(Fragment++));
230   }
231 };
232 } // namespace
233 
234 namespace llvm {
235 namespace bolt {
236 
237 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const {
238   // Apply execution count threshold
239   if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold)
240     return false;
241 
242   return BinaryFunctionPass::shouldOptimize(BF);
243 }
244 
245 void SplitFunctions::runOnFunctions(BinaryContext &BC) {
246   if (!opts::SplitFunctions)
247     return;
248 
249   std::unique_ptr<SplitStrategy> Strategy;
250   bool ForceSequential = false;
251 
252   switch (opts::SplitStrategy) {
253   case SplitFunctionsStrategy::Profile2:
254     Strategy = std::make_unique<SplitProfile2>();
255     break;
256   case SplitFunctionsStrategy::Random2:
257     Strategy = std::make_unique<SplitRandom2>();
258     // If we split functions randomly, we need to ensure that across runs with
259     // the same input, we generate random numbers for each function in the same
260     // order.
261     ForceSequential = true;
262     break;
263   case SplitFunctionsStrategy::RandomN:
264     Strategy = std::make_unique<SplitRandomN>();
265     ForceSequential = true;
266     break;
267   case SplitFunctionsStrategy::All:
268     Strategy = std::make_unique<SplitAll>();
269     break;
270   }
271 
272   ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
273     return !shouldOptimize(BF);
274   };
275 
276   ParallelUtilities::runOnEachFunction(
277       BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR,
278       [&](BinaryFunction &BF) { splitFunction(BF, *Strategy); }, SkipFunc,
279       "SplitFunctions", ForceSequential);
280 
281   if (SplitBytesHot + SplitBytesCold > 0)
282     outs() << "BOLT-INFO: splitting separates " << SplitBytesHot
283            << " hot bytes from " << SplitBytesCold << " cold bytes "
284            << format("(%.2lf%% of split functions is hot).\n",
285                      100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
286 }
287 
288 void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
289   if (BF.empty())
290     return;
291 
292   if (!S.canSplit(BF))
293     return;
294 
295   FunctionLayout &Layout = BF.getLayout();
296   BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(),
297                                                      Layout.block_end());
298 
299   BinaryContext &BC = BF.getBinaryContext();
300   size_t OriginalHotSize;
301   size_t HotSize;
302   size_t ColdSize;
303   if (BC.isX86()) {
304     std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF);
305     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
306                       << " pre-split is <0x"
307                       << Twine::utohexstr(OriginalHotSize) << ", 0x"
308                       << Twine::utohexstr(ColdSize) << ">\n");
309   }
310 
311   BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(),
312                                                 Layout.block_end());
313   // Never outline the first basic block.
314   NewLayout.front()->setCanOutline(false);
315   for (BinaryBasicBlock *const BB : NewLayout) {
316     if (!BB->canOutline())
317       continue;
318 
319     // Do not split extra entry points in aarch64. They can be referred by
320     // using ADRs and when this happens, these blocks cannot be placed far
321     // away due to the limited range in ADR instruction.
322     if (BC.isAArch64() && BB->isEntryPoint()) {
323       BB->setCanOutline(false);
324       continue;
325     }
326 
327     if (BF.hasEHRanges() && !opts::SplitEH) {
328       // We cannot move landing pads (or rather entry points for landing pads).
329       if (BB->isLandingPad()) {
330         BB->setCanOutline(false);
331         continue;
332       }
333       // We cannot move a block that can throw since exception-handling
334       // runtime cannot deal with split functions. However, if we can guarantee
335       // that the block never throws, it is safe to move the block to
336       // decrease the size of the function.
337       for (MCInst &Instr : *BB) {
338         if (BC.MIB->isInvoke(Instr)) {
339           BB->setCanOutline(false);
340           break;
341         }
342       }
343     }
344   }
345 
346   BF.getLayout().updateLayoutIndices();
347   S.fragment(NewLayout.begin(), NewLayout.end());
348 
349   // Make sure all non-outlineable blocks are in the main-fragment.
350   for (BinaryBasicBlock *const BB : NewLayout) {
351     if (!BB->canOutline())
352       BB->setFragmentNum(FragmentNum::main());
353   }
354 
355   if (opts::AggressiveSplitting) {
356     // All blocks with 0 count that we can move go to the end of the function.
357     // Even if they were natural to cluster formation and were seen in-between
358     // hot basic blocks.
359     llvm::stable_sort(NewLayout, [&](const BinaryBasicBlock *const A,
360                                      const BinaryBasicBlock *const B) {
361       return A->getFragmentNum() < B->getFragmentNum();
362     });
363   } else if (BF.hasEHRanges() && !opts::SplitEH) {
364     // Typically functions with exception handling have landing pads at the end.
365     // We cannot move beginning of landing pads, but we can move 0-count blocks
366     // comprising landing pads to the end and thus facilitate splitting.
367     auto FirstLP = NewLayout.begin();
368     while ((*FirstLP)->isLandingPad())
369       ++FirstLP;
370 
371     std::stable_sort(FirstLP, NewLayout.end(),
372                      [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
373                        return A->getFragmentNum() < B->getFragmentNum();
374                      });
375   }
376 
377   // Make sure that fragments are increasing.
378   FragmentNum CurrentFragment = NewLayout.back()->getFragmentNum();
379   for (BinaryBasicBlock *const BB : reverse(NewLayout)) {
380     if (BB->getFragmentNum() > CurrentFragment)
381       BB->setFragmentNum(CurrentFragment);
382     CurrentFragment = BB->getFragmentNum();
383   }
384 
385   if (!S.keepEmpty()) {
386     FragmentNum CurrentFragment = FragmentNum::main();
387     FragmentNum NewFragment = FragmentNum::main();
388     for (BinaryBasicBlock *const BB : NewLayout) {
389       if (BB->getFragmentNum() > CurrentFragment) {
390         CurrentFragment = BB->getFragmentNum();
391         NewFragment = FragmentNum(NewFragment.get() + 1);
392       }
393       BB->setFragmentNum(NewFragment);
394     }
395   }
396 
397   BF.getLayout().update(NewLayout);
398 
399   // For shared objects, invoke instructions and corresponding landing pads
400   // have to be placed in the same fragment. When we split them, create
401   // trampoline landing pads that will redirect the execution to real LPs.
402   TrampolineSetType Trampolines;
403   if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
404     Trampolines = createEHTrampolines(BF);
405 
406   // Check the new size to see if it's worth splitting the function.
407   if (BC.isX86() && BF.isSplit()) {
408     std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
409     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
410                       << " post-split is <0x" << Twine::utohexstr(HotSize)
411                       << ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
412     if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
413         alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
414       if (opts::Verbosity >= 2) {
415         outs() << "BOLT-INFO: Reversing splitting of function "
416                << formatv("{0}:\n  {1:x}, {2:x} -> {3:x}\n", BF, HotSize,
417                           ColdSize, OriginalHotSize);
418       }
419 
420       // Reverse the action of createEHTrampolines(). The trampolines will be
421       // placed immediately before the matching destination resulting in no
422       // extra code.
423       if (PreSplitLayout.size() != BF.size())
424         PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines);
425 
426       for (BinaryBasicBlock &BB : BF)
427         BB.setFragmentNum(FragmentNum::main());
428       BF.getLayout().update(PreSplitLayout);
429     } else {
430       SplitBytesHot += HotSize;
431       SplitBytesCold += ColdSize;
432     }
433   }
434 }
435 
436 SplitFunctions::TrampolineSetType
437 SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
438   const auto &MIB = BF.getBinaryContext().MIB;
439 
440   // Map real landing pads to the corresponding trampolines.
441   TrampolineSetType LPTrampolines;
442 
443   // Iterate over the copy of basic blocks since we are adding new blocks to the
444   // function which will invalidate its iterators.
445   std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend());
446   for (BinaryBasicBlock *BB : Blocks) {
447     for (MCInst &Instr : *BB) {
448       const std::optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr);
449       if (!EHInfo || !EHInfo->first)
450         continue;
451 
452       const MCSymbol *LPLabel = EHInfo->first;
453       BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel);
454       if (BB->getFragmentNum() == LPBlock->getFragmentNum())
455         continue;
456 
457       const MCSymbol *TrampolineLabel = nullptr;
458       const TrampolineKey Key(BB->getFragmentNum(), LPLabel);
459       auto Iter = LPTrampolines.find(Key);
460       if (Iter != LPTrampolines.end()) {
461         TrampolineLabel = Iter->second;
462       } else {
463         // Create a trampoline basic block in the same fragment as the thrower.
464         // Note: there's no need to insert the jump instruction, it will be
465         // added by fixBranches().
466         BinaryBasicBlock *TrampolineBB = BF.addBasicBlock();
467         TrampolineBB->setFragmentNum(BB->getFragmentNum());
468         TrampolineBB->setExecutionCount(LPBlock->getExecutionCount());
469         TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount());
470         TrampolineBB->setCFIState(LPBlock->getCFIState());
471         TrampolineLabel = TrampolineBB->getLabel();
472         LPTrampolines.insert(std::make_pair(Key, TrampolineLabel));
473       }
474 
475       // Substitute the landing pad with the trampoline.
476       MIB->updateEHInfo(Instr,
477                         MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second));
478     }
479   }
480 
481   if (LPTrampolines.empty())
482     return LPTrampolines;
483 
484   // All trampoline blocks were added to the end of the function. Place them at
485   // the end of corresponding fragments.
486   BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(),
487                                                 BF.getLayout().block_end());
488   stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
489     return A->getFragmentNum() < B->getFragmentNum();
490   });
491   BF.getLayout().update(NewLayout);
492 
493   // Conservatively introduce branch instructions.
494   BF.fixBranches();
495 
496   // Update exception-handling CFG for the function.
497   BF.recomputeLandingPads();
498 
499   return LPTrampolines;
500 }
501 
502 SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines(
503     BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout,
504     const SplitFunctions::TrampolineSetType &Trampolines) const {
505   DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>>
506       IncomingTrampolines;
507   for (const auto &Entry : Trampolines) {
508     IncomingTrampolines[Entry.getFirst().Target].emplace_back(
509         Entry.getSecond());
510   }
511 
512   BasicBlockOrderType MergedLayout;
513   for (BinaryBasicBlock *BB : Layout) {
514     auto Iter = IncomingTrampolines.find(BB->getLabel());
515     if (Iter != IncomingTrampolines.end()) {
516       for (const MCSymbol *const Trampoline : Iter->getSecond()) {
517         BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Trampoline);
518         assert(LPBlock && "Could not find matching landing pad block.");
519         MergedLayout.push_back(LPBlock);
520       }
521     }
522     MergedLayout.push_back(BB);
523   }
524 
525   return MergedLayout;
526 }
527 
528 } // namespace bolt
529 } // namespace llvm
530