1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SplitFunctions pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/SplitFunctions.h" 14 #include "bolt/Core/BinaryBasicBlock.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Core/FunctionLayout.h" 17 #include "bolt/Core/ParallelUtilities.h" 18 #include "bolt/Utils/CommandLineOpts.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/Sequence.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/iterator_range.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/FormatVariadic.h" 25 #include <algorithm> 26 #include <iterator> 27 #include <memory> 28 #include <numeric> 29 #include <random> 30 #include <vector> 31 32 #define DEBUG_TYPE "bolt-opts" 33 34 using namespace llvm; 35 using namespace bolt; 36 37 namespace { 38 class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> { 39 public: 40 explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) 41 : cl::parser<bool>(O) {} 42 43 bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { 44 if (Arg == "2" || Arg == "3") { 45 Value = true; 46 errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " 47 "for option -{1} is deprecated\n", 48 Arg, ArgName); 49 return false; 50 } 51 return cl::parser<bool>::parse(O, ArgName, Arg, Value); 52 } 53 }; 54 } // namespace 55 56 namespace opts { 57 58 extern cl::OptionCategory BoltOptCategory; 59 60 extern cl::opt<bool> SplitEH; 61 extern cl::opt<unsigned> ExecutionCountThreshold; 62 extern cl::opt<uint32_t> RandomSeed; 63 64 static cl::opt<bool> AggressiveSplitting( 65 "split-all-cold", cl::desc("outline as many cold basic blocks as possible"), 66 cl::cat(BoltOptCategory)); 67 68 static cl::opt<unsigned> SplitAlignThreshold( 69 "split-align-threshold", 70 cl::desc("when deciding to split a function, apply this alignment " 71 "while doing the size comparison (see -split-threshold). " 72 "Default value: 2."), 73 cl::init(2), 74 75 cl::Hidden, cl::cat(BoltOptCategory)); 76 77 static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser> 78 SplitFunctions("split-functions", 79 cl::desc("split functions into fragments"), 80 cl::cat(BoltOptCategory)); 81 82 static cl::opt<unsigned> SplitThreshold( 83 "split-threshold", 84 cl::desc("split function only if its main size is reduced by more than " 85 "given amount of bytes. Default value: 0, i.e. split iff the " 86 "size is reduced. Note that on some architectures the size can " 87 "increase after splitting."), 88 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); 89 90 static cl::opt<SplitFunctionsStrategy> SplitStrategy( 91 "split-strategy", cl::init(SplitFunctionsStrategy::Profile2), 92 cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2", 93 "split each function into a hot and cold fragment " 94 "using profiling information")), 95 cl::values(clEnumValN( 96 SplitFunctionsStrategy::Random2, "random2", 97 "split each function into a hot and cold fragment at a randomly chosen " 98 "split point (ignoring any available profiling information)")), 99 cl::values(clEnumValN( 100 SplitFunctionsStrategy::RandomN, "randomN", 101 "split each function into N fragments at a randomly chosen split " 102 "points (ignoring any available profiling information)")), 103 cl::values(clEnumValN( 104 SplitFunctionsStrategy::All, "all", 105 "split all basic blocks of each function into fragments such that each " 106 "fragment contains exactly a single basic block")), 107 cl::desc("strategy used to partition blocks into fragments"), 108 cl::cat(BoltOptCategory)); 109 } // namespace opts 110 111 namespace { 112 bool hasFullProfile(const BinaryFunction &BF) { 113 return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) { 114 return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE; 115 }); 116 } 117 118 bool allBlocksCold(const BinaryFunction &BF) { 119 return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) { 120 return BB.getExecutionCount() == 0; 121 }); 122 } 123 124 struct SplitProfile2 final : public SplitStrategy { 125 bool canSplit(const BinaryFunction &BF) override { 126 return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); 127 } 128 129 bool keepEmpty() override { return false; } 130 131 void fragment(const BlockIt Start, const BlockIt End) override { 132 for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) { 133 if (BB->getExecutionCount() == 0) 134 BB->setFragmentNum(FragmentNum::cold()); 135 } 136 } 137 }; 138 139 struct SplitRandom2 final : public SplitStrategy { 140 std::minstd_rand0 Gen; 141 142 SplitRandom2() : Gen(opts::RandomSeed.getValue()) {} 143 144 bool canSplit(const BinaryFunction &BF) override { return true; } 145 146 bool keepEmpty() override { return false; } 147 148 void fragment(const BlockIt Start, const BlockIt End) override { 149 using DiffT = typename std::iterator_traits<BlockIt>::difference_type; 150 const DiffT NumBlocks = End - Start; 151 assert(NumBlocks > 0 && "Cannot fragment empty function"); 152 153 // We want to split at least one block 154 const auto LastSplitPoint = std::max<DiffT>(NumBlocks - 1, 1); 155 std::uniform_int_distribution<DiffT> Dist(1, LastSplitPoint); 156 const DiffT SplitPoint = Dist(Gen); 157 for (BinaryBasicBlock *BB : llvm::make_range(Start + SplitPoint, End)) 158 BB->setFragmentNum(FragmentNum::cold()); 159 160 LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of " 161 "{1} possible) blocks to split\n", 162 NumBlocks - SplitPoint, End - Start)); 163 } 164 }; 165 166 struct SplitRandomN final : public SplitStrategy { 167 std::minstd_rand0 Gen; 168 169 SplitRandomN() : Gen(opts::RandomSeed.getValue()) {} 170 171 bool canSplit(const BinaryFunction &BF) override { return true; } 172 173 bool keepEmpty() override { return false; } 174 175 void fragment(const BlockIt Start, const BlockIt End) override { 176 using DiffT = typename std::iterator_traits<BlockIt>::difference_type; 177 const DiffT NumBlocks = End - Start; 178 assert(NumBlocks > 0 && "Cannot fragment empty function"); 179 180 // With n blocks, there are n-1 places to split them. 181 const DiffT MaximumSplits = NumBlocks - 1; 182 // We want to generate at least two fragment if possible, but if there is 183 // only one block, no splits are possible. 184 const auto MinimumSplits = std::min<DiffT>(MaximumSplits, 1); 185 std::uniform_int_distribution<DiffT> Dist(MinimumSplits, MaximumSplits); 186 // Choose how many splits to perform 187 const DiffT NumSplits = Dist(Gen); 188 189 // Draw split points from a lottery 190 SmallVector<unsigned, 0> Lottery(MaximumSplits); 191 // Start lottery at 1, because there is no meaningful splitpoint before the 192 // first block. 193 std::iota(Lottery.begin(), Lottery.end(), 1u); 194 std::shuffle(Lottery.begin(), Lottery.end(), Gen); 195 Lottery.resize(NumSplits); 196 llvm::sort(Lottery); 197 198 // Add one past the end entry to lottery 199 Lottery.push_back(NumBlocks); 200 201 unsigned LotteryIndex = 0; 202 unsigned BBPos = 0; 203 for (BinaryBasicBlock *const BB : make_range(Start, End)) { 204 // Check whether to start new fragment 205 if (BBPos >= Lottery[LotteryIndex]) 206 ++LotteryIndex; 207 208 // Because LotteryIndex is 0 based and cold fragments are 1 based, we can 209 // use the index to assign fragments. 210 BB->setFragmentNum(FragmentNum(LotteryIndex)); 211 212 ++BBPos; 213 } 214 } 215 }; 216 217 struct SplitAll final : public SplitStrategy { 218 bool canSplit(const BinaryFunction &BF) override { return true; } 219 220 bool keepEmpty() override { 221 // Keeping empty fragments allows us to test, that empty fragments do not 222 // generate symbols. 223 return true; 224 } 225 226 void fragment(const BlockIt Start, const BlockIt End) override { 227 unsigned Fragment = 0; 228 for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) 229 BB->setFragmentNum(FragmentNum(Fragment++)); 230 } 231 }; 232 } // namespace 233 234 namespace llvm { 235 namespace bolt { 236 237 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { 238 // Apply execution count threshold 239 if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) 240 return false; 241 242 return BinaryFunctionPass::shouldOptimize(BF); 243 } 244 245 void SplitFunctions::runOnFunctions(BinaryContext &BC) { 246 if (!opts::SplitFunctions) 247 return; 248 249 std::unique_ptr<SplitStrategy> Strategy; 250 bool ForceSequential = false; 251 252 switch (opts::SplitStrategy) { 253 case SplitFunctionsStrategy::Profile2: 254 Strategy = std::make_unique<SplitProfile2>(); 255 break; 256 case SplitFunctionsStrategy::Random2: 257 Strategy = std::make_unique<SplitRandom2>(); 258 // If we split functions randomly, we need to ensure that across runs with 259 // the same input, we generate random numbers for each function in the same 260 // order. 261 ForceSequential = true; 262 break; 263 case SplitFunctionsStrategy::RandomN: 264 Strategy = std::make_unique<SplitRandomN>(); 265 ForceSequential = true; 266 break; 267 case SplitFunctionsStrategy::All: 268 Strategy = std::make_unique<SplitAll>(); 269 break; 270 } 271 272 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 273 return !shouldOptimize(BF); 274 }; 275 276 ParallelUtilities::runOnEachFunction( 277 BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, 278 [&](BinaryFunction &BF) { splitFunction(BF, *Strategy); }, SkipFunc, 279 "SplitFunctions", ForceSequential); 280 281 if (SplitBytesHot + SplitBytesCold > 0) 282 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot 283 << " hot bytes from " << SplitBytesCold << " cold bytes " 284 << format("(%.2lf%% of split functions is hot).\n", 285 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold)); 286 } 287 288 void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { 289 if (BF.empty()) 290 return; 291 292 if (!S.canSplit(BF)) 293 return; 294 295 FunctionLayout &Layout = BF.getLayout(); 296 BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(), 297 Layout.block_end()); 298 299 BinaryContext &BC = BF.getBinaryContext(); 300 size_t OriginalHotSize; 301 size_t HotSize; 302 size_t ColdSize; 303 if (BC.isX86()) { 304 std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF); 305 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 306 << " pre-split is <0x" 307 << Twine::utohexstr(OriginalHotSize) << ", 0x" 308 << Twine::utohexstr(ColdSize) << ">\n"); 309 } 310 311 BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(), 312 Layout.block_end()); 313 // Never outline the first basic block. 314 NewLayout.front()->setCanOutline(false); 315 for (BinaryBasicBlock *const BB : NewLayout) { 316 if (!BB->canOutline()) 317 continue; 318 319 // Do not split extra entry points in aarch64. They can be referred by 320 // using ADRs and when this happens, these blocks cannot be placed far 321 // away due to the limited range in ADR instruction. 322 if (BC.isAArch64() && BB->isEntryPoint()) { 323 BB->setCanOutline(false); 324 continue; 325 } 326 327 if (BF.hasEHRanges() && !opts::SplitEH) { 328 // We cannot move landing pads (or rather entry points for landing pads). 329 if (BB->isLandingPad()) { 330 BB->setCanOutline(false); 331 continue; 332 } 333 // We cannot move a block that can throw since exception-handling 334 // runtime cannot deal with split functions. However, if we can guarantee 335 // that the block never throws, it is safe to move the block to 336 // decrease the size of the function. 337 for (MCInst &Instr : *BB) { 338 if (BC.MIB->isInvoke(Instr)) { 339 BB->setCanOutline(false); 340 break; 341 } 342 } 343 } 344 } 345 346 BF.getLayout().updateLayoutIndices(); 347 S.fragment(NewLayout.begin(), NewLayout.end()); 348 349 // Make sure all non-outlineable blocks are in the main-fragment. 350 for (BinaryBasicBlock *const BB : NewLayout) { 351 if (!BB->canOutline()) 352 BB->setFragmentNum(FragmentNum::main()); 353 } 354 355 if (opts::AggressiveSplitting) { 356 // All blocks with 0 count that we can move go to the end of the function. 357 // Even if they were natural to cluster formation and were seen in-between 358 // hot basic blocks. 359 llvm::stable_sort(NewLayout, [&](const BinaryBasicBlock *const A, 360 const BinaryBasicBlock *const B) { 361 return A->getFragmentNum() < B->getFragmentNum(); 362 }); 363 } else if (BF.hasEHRanges() && !opts::SplitEH) { 364 // Typically functions with exception handling have landing pads at the end. 365 // We cannot move beginning of landing pads, but we can move 0-count blocks 366 // comprising landing pads to the end and thus facilitate splitting. 367 auto FirstLP = NewLayout.begin(); 368 while ((*FirstLP)->isLandingPad()) 369 ++FirstLP; 370 371 std::stable_sort(FirstLP, NewLayout.end(), 372 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 373 return A->getFragmentNum() < B->getFragmentNum(); 374 }); 375 } 376 377 // Make sure that fragments are increasing. 378 FragmentNum CurrentFragment = NewLayout.back()->getFragmentNum(); 379 for (BinaryBasicBlock *const BB : reverse(NewLayout)) { 380 if (BB->getFragmentNum() > CurrentFragment) 381 BB->setFragmentNum(CurrentFragment); 382 CurrentFragment = BB->getFragmentNum(); 383 } 384 385 if (!S.keepEmpty()) { 386 FragmentNum CurrentFragment = FragmentNum::main(); 387 FragmentNum NewFragment = FragmentNum::main(); 388 for (BinaryBasicBlock *const BB : NewLayout) { 389 if (BB->getFragmentNum() > CurrentFragment) { 390 CurrentFragment = BB->getFragmentNum(); 391 NewFragment = FragmentNum(NewFragment.get() + 1); 392 } 393 BB->setFragmentNum(NewFragment); 394 } 395 } 396 397 BF.getLayout().update(NewLayout); 398 399 // For shared objects, invoke instructions and corresponding landing pads 400 // have to be placed in the same fragment. When we split them, create 401 // trampoline landing pads that will redirect the execution to real LPs. 402 TrampolineSetType Trampolines; 403 if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) 404 Trampolines = createEHTrampolines(BF); 405 406 // Check the new size to see if it's worth splitting the function. 407 if (BC.isX86() && BF.isSplit()) { 408 std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); 409 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 410 << " post-split is <0x" << Twine::utohexstr(HotSize) 411 << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); 412 if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <= 413 alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) { 414 if (opts::Verbosity >= 2) { 415 outs() << "BOLT-INFO: Reversing splitting of function " 416 << formatv("{0}:\n {1:x}, {2:x} -> {3:x}\n", BF, HotSize, 417 ColdSize, OriginalHotSize); 418 } 419 420 // Reverse the action of createEHTrampolines(). The trampolines will be 421 // placed immediately before the matching destination resulting in no 422 // extra code. 423 if (PreSplitLayout.size() != BF.size()) 424 PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines); 425 426 for (BinaryBasicBlock &BB : BF) 427 BB.setFragmentNum(FragmentNum::main()); 428 BF.getLayout().update(PreSplitLayout); 429 } else { 430 SplitBytesHot += HotSize; 431 SplitBytesCold += ColdSize; 432 } 433 } 434 } 435 436 SplitFunctions::TrampolineSetType 437 SplitFunctions::createEHTrampolines(BinaryFunction &BF) const { 438 const auto &MIB = BF.getBinaryContext().MIB; 439 440 // Map real landing pads to the corresponding trampolines. 441 TrampolineSetType LPTrampolines; 442 443 // Iterate over the copy of basic blocks since we are adding new blocks to the 444 // function which will invalidate its iterators. 445 std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend()); 446 for (BinaryBasicBlock *BB : Blocks) { 447 for (MCInst &Instr : *BB) { 448 const std::optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr); 449 if (!EHInfo || !EHInfo->first) 450 continue; 451 452 const MCSymbol *LPLabel = EHInfo->first; 453 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel); 454 if (BB->getFragmentNum() == LPBlock->getFragmentNum()) 455 continue; 456 457 const MCSymbol *TrampolineLabel = nullptr; 458 const TrampolineKey Key(BB->getFragmentNum(), LPLabel); 459 auto Iter = LPTrampolines.find(Key); 460 if (Iter != LPTrampolines.end()) { 461 TrampolineLabel = Iter->second; 462 } else { 463 // Create a trampoline basic block in the same fragment as the thrower. 464 // Note: there's no need to insert the jump instruction, it will be 465 // added by fixBranches(). 466 BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); 467 TrampolineBB->setFragmentNum(BB->getFragmentNum()); 468 TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); 469 TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount()); 470 TrampolineBB->setCFIState(LPBlock->getCFIState()); 471 TrampolineLabel = TrampolineBB->getLabel(); 472 LPTrampolines.insert(std::make_pair(Key, TrampolineLabel)); 473 } 474 475 // Substitute the landing pad with the trampoline. 476 MIB->updateEHInfo(Instr, 477 MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second)); 478 } 479 } 480 481 if (LPTrampolines.empty()) 482 return LPTrampolines; 483 484 // All trampoline blocks were added to the end of the function. Place them at 485 // the end of corresponding fragments. 486 BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(), 487 BF.getLayout().block_end()); 488 stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 489 return A->getFragmentNum() < B->getFragmentNum(); 490 }); 491 BF.getLayout().update(NewLayout); 492 493 // Conservatively introduce branch instructions. 494 BF.fixBranches(); 495 496 // Update exception-handling CFG for the function. 497 BF.recomputeLandingPads(); 498 499 return LPTrampolines; 500 } 501 502 SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines( 503 BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout, 504 const SplitFunctions::TrampolineSetType &Trampolines) const { 505 DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>> 506 IncomingTrampolines; 507 for (const auto &Entry : Trampolines) { 508 IncomingTrampolines[Entry.getFirst().Target].emplace_back( 509 Entry.getSecond()); 510 } 511 512 BasicBlockOrderType MergedLayout; 513 for (BinaryBasicBlock *BB : Layout) { 514 auto Iter = IncomingTrampolines.find(BB->getLabel()); 515 if (Iter != IncomingTrampolines.end()) { 516 for (const MCSymbol *const Trampoline : Iter->getSecond()) { 517 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Trampoline); 518 assert(LPBlock && "Could not find matching landing pad block."); 519 MergedLayout.push_back(LPBlock); 520 } 521 } 522 MergedLayout.push_back(BB); 523 } 524 525 return MergedLayout; 526 } 527 528 } // namespace bolt 529 } // namespace llvm 530