1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SplitFunctions pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/SplitFunctions.h" 14 #include "bolt/Core/BinaryBasicBlock.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Core/FunctionLayout.h" 17 #include "bolt/Core/ParallelUtilities.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/Sequence.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/iterator_range.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/FormatVariadic.h" 24 #include <algorithm> 25 #include <iterator> 26 #include <memory> 27 #include <numeric> 28 #include <random> 29 #include <vector> 30 31 #define DEBUG_TYPE "bolt-opts" 32 33 using namespace llvm; 34 using namespace bolt; 35 36 namespace { 37 class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> { 38 public: 39 explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) 40 : cl::parser<bool>(O) {} 41 42 bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { 43 if (Arg == "2" || Arg == "3") { 44 Value = true; 45 errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " 46 "for option -{1} is deprecated\n", 47 Arg, ArgName); 48 return false; 49 } 50 return cl::parser<bool>::parse(O, ArgName, Arg, Value); 51 } 52 }; 53 } // namespace 54 55 namespace opts { 56 57 extern cl::OptionCategory BoltOptCategory; 58 59 extern cl::opt<bool> SplitEH; 60 extern cl::opt<unsigned> ExecutionCountThreshold; 61 extern cl::opt<uint32_t> RandomSeed; 62 63 static cl::opt<bool> AggressiveSplitting( 64 "split-all-cold", cl::desc("outline as many cold basic blocks as possible"), 65 cl::cat(BoltOptCategory)); 66 67 static cl::opt<unsigned> SplitAlignThreshold( 68 "split-align-threshold", 69 cl::desc("when deciding to split a function, apply this alignment " 70 "while doing the size comparison (see -split-threshold). " 71 "Default value: 2."), 72 cl::init(2), 73 74 cl::Hidden, cl::cat(BoltOptCategory)); 75 76 static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser> 77 SplitFunctions("split-functions", 78 cl::desc("split functions into fragments"), 79 cl::cat(BoltOptCategory)); 80 81 static cl::opt<unsigned> SplitThreshold( 82 "split-threshold", 83 cl::desc("split function only if its main size is reduced by more than " 84 "given amount of bytes. Default value: 0, i.e. split iff the " 85 "size is reduced. Note that on some architectures the size can " 86 "increase after splitting."), 87 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); 88 89 static cl::opt<SplitFunctionsStrategy> SplitStrategy( 90 "split-strategy", cl::init(SplitFunctionsStrategy::Profile2), 91 cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2", 92 "split each function into a hot and cold fragment " 93 "using profiling information")), 94 cl::values(clEnumValN( 95 SplitFunctionsStrategy::Random2, "random2", 96 "split each function into a hot and cold fragment at a randomly chosen " 97 "split point (ignoring any available profiling information)")), 98 cl::values(clEnumValN( 99 SplitFunctionsStrategy::RandomN, "randomN", 100 "split each function into N fragments at a randomly chosen split " 101 "points (ignoring any available profiling information)")), 102 cl::values(clEnumValN( 103 SplitFunctionsStrategy::All, "all", 104 "split all basic blocks of each function into fragments such that each " 105 "fragment contains exactly a single basic block")), 106 cl::desc("strategy used to partition blocks into fragments"), 107 cl::cat(BoltOptCategory)); 108 } // namespace opts 109 110 namespace { 111 bool hasFullProfile(const BinaryFunction &BF) { 112 return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) { 113 return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE; 114 }); 115 } 116 117 bool allBlocksCold(const BinaryFunction &BF) { 118 return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) { 119 return BB.getExecutionCount() == 0; 120 }); 121 } 122 123 struct SplitProfile2 final : public SplitStrategy { 124 bool canSplit(const BinaryFunction &BF) override { 125 return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); 126 } 127 128 bool keepEmpty() override { return false; } 129 130 void fragment(const BlockIt Start, const BlockIt End) override { 131 for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) { 132 if (BB->getExecutionCount() == 0) 133 BB->setFragmentNum(FragmentNum::cold()); 134 } 135 } 136 }; 137 138 struct SplitRandom2 final : public SplitStrategy { 139 std::minstd_rand0 Gen; 140 141 SplitRandom2() : Gen(opts::RandomSeed.getValue()) {} 142 143 bool canSplit(const BinaryFunction &BF) override { return true; } 144 145 bool keepEmpty() override { return false; } 146 147 void fragment(const BlockIt Start, const BlockIt End) override { 148 using DiffT = typename std::iterator_traits<BlockIt>::difference_type; 149 const DiffT NumBlocks = End - Start; 150 assert(NumBlocks > 0 && "Cannot fragment empty function"); 151 152 // We want to split at least one block 153 const auto LastSplitPoint = std::max<DiffT>(NumBlocks - 1, 1); 154 std::uniform_int_distribution<DiffT> Dist(1, LastSplitPoint); 155 const DiffT SplitPoint = Dist(Gen); 156 for (BinaryBasicBlock *BB : llvm::make_range(Start + SplitPoint, End)) 157 BB->setFragmentNum(FragmentNum::cold()); 158 159 LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of " 160 "{1} possible) blocks to split\n", 161 NumBlocks - SplitPoint, End - Start)); 162 } 163 }; 164 165 struct SplitRandomN final : public SplitStrategy { 166 std::minstd_rand0 Gen; 167 168 SplitRandomN() : Gen(opts::RandomSeed.getValue()) {} 169 170 bool canSplit(const BinaryFunction &BF) override { return true; } 171 172 bool keepEmpty() override { return false; } 173 174 void fragment(const BlockIt Start, const BlockIt End) override { 175 using DiffT = typename std::iterator_traits<BlockIt>::difference_type; 176 const DiffT NumBlocks = End - Start; 177 assert(NumBlocks > 0 && "Cannot fragment empty function"); 178 179 // With n blocks, there are n-1 places to split them. 180 const DiffT MaximumSplits = NumBlocks - 1; 181 // We want to generate at least two fragment if possible, but if there is 182 // only one block, no splits are possible. 183 const auto MinimumSplits = std::min<DiffT>(MaximumSplits, 1); 184 std::uniform_int_distribution<DiffT> Dist(MinimumSplits, MaximumSplits); 185 // Choose how many splits to perform 186 const DiffT NumSplits = Dist(Gen); 187 188 // Draw split points from a lottery 189 SmallVector<unsigned, 0> Lottery(MaximumSplits); 190 // Start lottery at 1, because there is no meaningful splitpoint before the 191 // first block. 192 std::iota(Lottery.begin(), Lottery.end(), 1u); 193 std::shuffle(Lottery.begin(), Lottery.end(), Gen); 194 Lottery.resize(NumSplits); 195 llvm::sort(Lottery); 196 197 // Add one past the end entry to lottery 198 Lottery.push_back(NumBlocks); 199 200 unsigned LotteryIndex = 0; 201 unsigned BBPos = 0; 202 for (BinaryBasicBlock *const BB : make_range(Start, End)) { 203 // Check whether to start new fragment 204 if (BBPos >= Lottery[LotteryIndex]) 205 ++LotteryIndex; 206 207 // Because LotteryIndex is 0 based and cold fragments are 1 based, we can 208 // use the index to assign fragments. 209 BB->setFragmentNum(FragmentNum(LotteryIndex)); 210 211 ++BBPos; 212 } 213 } 214 }; 215 216 struct SplitAll final : public SplitStrategy { 217 bool canSplit(const BinaryFunction &BF) override { return true; } 218 219 bool keepEmpty() override { 220 // Keeping empty fragments allows us to test, that empty fragments do not 221 // generate symbols. 222 return true; 223 } 224 225 void fragment(const BlockIt Start, const BlockIt End) override { 226 unsigned Fragment = 0; 227 for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) 228 BB->setFragmentNum(FragmentNum(Fragment++)); 229 } 230 }; 231 } // namespace 232 233 namespace llvm { 234 namespace bolt { 235 236 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { 237 // Apply execution count threshold 238 if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) 239 return false; 240 241 return BinaryFunctionPass::shouldOptimize(BF); 242 } 243 244 void SplitFunctions::runOnFunctions(BinaryContext &BC) { 245 if (!opts::SplitFunctions) 246 return; 247 248 std::unique_ptr<SplitStrategy> Strategy; 249 bool ForceSequential = false; 250 251 switch (opts::SplitStrategy) { 252 case SplitFunctionsStrategy::Profile2: 253 Strategy = std::make_unique<SplitProfile2>(); 254 break; 255 case SplitFunctionsStrategy::Random2: 256 Strategy = std::make_unique<SplitRandom2>(); 257 // If we split functions randomly, we need to ensure that across runs with 258 // the same input, we generate random numbers for each function in the same 259 // order. 260 ForceSequential = true; 261 break; 262 case SplitFunctionsStrategy::RandomN: 263 Strategy = std::make_unique<SplitRandomN>(); 264 ForceSequential = true; 265 break; 266 case SplitFunctionsStrategy::All: 267 Strategy = std::make_unique<SplitAll>(); 268 break; 269 } 270 271 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 272 return !shouldOptimize(BF); 273 }; 274 275 ParallelUtilities::runOnEachFunction( 276 BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, 277 [&](BinaryFunction &BF) { splitFunction(BF, *Strategy); }, SkipFunc, 278 "SplitFunctions", ForceSequential); 279 280 if (SplitBytesHot + SplitBytesCold > 0) 281 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot 282 << " hot bytes from " << SplitBytesCold << " cold bytes " 283 << format("(%.2lf%% of split functions is hot).\n", 284 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold)); 285 } 286 287 void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { 288 if (BF.empty()) 289 return; 290 291 if (!S.canSplit(BF)) 292 return; 293 294 FunctionLayout &Layout = BF.getLayout(); 295 BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(), 296 Layout.block_end()); 297 298 BinaryContext &BC = BF.getBinaryContext(); 299 size_t OriginalHotSize; 300 size_t HotSize; 301 size_t ColdSize; 302 if (BC.isX86()) { 303 std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF); 304 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 305 << " pre-split is <0x" 306 << Twine::utohexstr(OriginalHotSize) << ", 0x" 307 << Twine::utohexstr(ColdSize) << ">\n"); 308 } 309 310 BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(), 311 Layout.block_end()); 312 // Never outline the first basic block. 313 NewLayout.front()->setCanOutline(false); 314 for (BinaryBasicBlock *const BB : NewLayout) { 315 if (!BB->canOutline()) 316 continue; 317 318 // Do not split extra entry points in aarch64. They can be referred by 319 // using ADRs and when this happens, these blocks cannot be placed far 320 // away due to the limited range in ADR instruction. 321 if (BC.isAArch64() && BB->isEntryPoint()) { 322 BB->setCanOutline(false); 323 continue; 324 } 325 326 if (BF.hasEHRanges() && !opts::SplitEH) { 327 // We cannot move landing pads (or rather entry points for landing pads). 328 if (BB->isLandingPad()) { 329 BB->setCanOutline(false); 330 continue; 331 } 332 // We cannot move a block that can throw since exception-handling 333 // runtime cannot deal with split functions. However, if we can guarantee 334 // that the block never throws, it is safe to move the block to 335 // decrease the size of the function. 336 for (MCInst &Instr : *BB) { 337 if (BC.MIB->isInvoke(Instr)) { 338 BB->setCanOutline(false); 339 break; 340 } 341 } 342 } 343 } 344 345 BF.getLayout().updateLayoutIndices(); 346 S.fragment(NewLayout.begin(), NewLayout.end()); 347 348 // Make sure all non-outlineable blocks are in the main-fragment. 349 for (BinaryBasicBlock *const BB : NewLayout) { 350 if (!BB->canOutline()) 351 BB->setFragmentNum(FragmentNum::main()); 352 } 353 354 if (opts::AggressiveSplitting) { 355 // All blocks with 0 count that we can move go to the end of the function. 356 // Even if they were natural to cluster formation and were seen in-between 357 // hot basic blocks. 358 llvm::stable_sort(NewLayout, [&](const BinaryBasicBlock *const A, 359 const BinaryBasicBlock *const B) { 360 return A->getFragmentNum() < B->getFragmentNum(); 361 }); 362 } else if (BF.hasEHRanges() && !opts::SplitEH) { 363 // Typically functions with exception handling have landing pads at the end. 364 // We cannot move beginning of landing pads, but we can move 0-count blocks 365 // comprising landing pads to the end and thus facilitate splitting. 366 auto FirstLP = NewLayout.begin(); 367 while ((*FirstLP)->isLandingPad()) 368 ++FirstLP; 369 370 std::stable_sort(FirstLP, NewLayout.end(), 371 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 372 return A->getFragmentNum() < B->getFragmentNum(); 373 }); 374 } 375 376 // Make sure that fragments are increasing. 377 FragmentNum CurrentFragment = NewLayout.back()->getFragmentNum(); 378 for (BinaryBasicBlock *const BB : reverse(NewLayout)) { 379 if (BB->getFragmentNum() > CurrentFragment) 380 BB->setFragmentNum(CurrentFragment); 381 CurrentFragment = BB->getFragmentNum(); 382 } 383 384 if (!S.keepEmpty()) { 385 FragmentNum CurrentFragment = FragmentNum::main(); 386 FragmentNum NewFragment = FragmentNum::main(); 387 for (BinaryBasicBlock *const BB : NewLayout) { 388 if (BB->getFragmentNum() > CurrentFragment) { 389 CurrentFragment = BB->getFragmentNum(); 390 NewFragment = FragmentNum(NewFragment.get() + 1); 391 } 392 BB->setFragmentNum(NewFragment); 393 } 394 } 395 396 BF.getLayout().update(NewLayout); 397 398 // For shared objects, invoke instructions and corresponding landing pads 399 // have to be placed in the same fragment. When we split them, create 400 // trampoline landing pads that will redirect the execution to real LPs. 401 TrampolineSetType Trampolines; 402 if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) 403 Trampolines = createEHTrampolines(BF); 404 405 // Check the new size to see if it's worth splitting the function. 406 if (BC.isX86() && BF.isSplit()) { 407 std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); 408 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 409 << " post-split is <0x" << Twine::utohexstr(HotSize) 410 << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); 411 if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <= 412 alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) { 413 LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n 0x" 414 << Twine::utohexstr(HotSize) << ", 0x" 415 << Twine::utohexstr(ColdSize) << " -> 0x" 416 << Twine::utohexstr(OriginalHotSize) << '\n'); 417 418 // Reverse the action of createEHTrampolines(). The trampolines will be 419 // placed immediately before the matching destination resulting in no 420 // extra code. 421 if (PreSplitLayout.size() != BF.size()) 422 PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines); 423 424 for (BinaryBasicBlock &BB : BF) 425 BB.setFragmentNum(FragmentNum::main()); 426 BF.getLayout().update(PreSplitLayout); 427 } else { 428 SplitBytesHot += HotSize; 429 SplitBytesCold += ColdSize; 430 } 431 } 432 } 433 434 SplitFunctions::TrampolineSetType 435 SplitFunctions::createEHTrampolines(BinaryFunction &BF) const { 436 const auto &MIB = BF.getBinaryContext().MIB; 437 438 // Map real landing pads to the corresponding trampolines. 439 TrampolineSetType LPTrampolines; 440 441 // Iterate over the copy of basic blocks since we are adding new blocks to the 442 // function which will invalidate its iterators. 443 std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend()); 444 for (BinaryBasicBlock *BB : Blocks) { 445 for (MCInst &Instr : *BB) { 446 const std::optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr); 447 if (!EHInfo || !EHInfo->first) 448 continue; 449 450 const MCSymbol *LPLabel = EHInfo->first; 451 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel); 452 if (BB->getFragmentNum() == LPBlock->getFragmentNum()) 453 continue; 454 455 const MCSymbol *TrampolineLabel = nullptr; 456 const TrampolineKey Key(BB->getFragmentNum(), LPLabel); 457 auto Iter = LPTrampolines.find(Key); 458 if (Iter != LPTrampolines.end()) { 459 TrampolineLabel = Iter->second; 460 } else { 461 // Create a trampoline basic block in the same fragment as the thrower. 462 // Note: there's no need to insert the jump instruction, it will be 463 // added by fixBranches(). 464 BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); 465 TrampolineBB->setFragmentNum(BB->getFragmentNum()); 466 TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); 467 TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount()); 468 TrampolineBB->setCFIState(LPBlock->getCFIState()); 469 TrampolineLabel = TrampolineBB->getLabel(); 470 LPTrampolines.insert(std::make_pair(Key, TrampolineLabel)); 471 } 472 473 // Substitute the landing pad with the trampoline. 474 MIB->updateEHInfo(Instr, 475 MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second)); 476 } 477 } 478 479 if (LPTrampolines.empty()) 480 return LPTrampolines; 481 482 // All trampoline blocks were added to the end of the function. Place them at 483 // the end of corresponding fragments. 484 BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(), 485 BF.getLayout().block_end()); 486 stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 487 return A->getFragmentNum() < B->getFragmentNum(); 488 }); 489 BF.getLayout().update(NewLayout); 490 491 // Conservatively introduce branch instructions. 492 BF.fixBranches(); 493 494 // Update exception-handling CFG for the function. 495 BF.recomputeLandingPads(); 496 497 return LPTrampolines; 498 } 499 500 SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines( 501 BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout, 502 const SplitFunctions::TrampolineSetType &Trampolines) const { 503 DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>> 504 IncomingTrampolines; 505 for (const auto &Entry : Trampolines) { 506 IncomingTrampolines[Entry.getFirst().Target].emplace_back( 507 Entry.getSecond()); 508 } 509 510 BasicBlockOrderType MergedLayout; 511 for (BinaryBasicBlock *BB : Layout) { 512 auto Iter = IncomingTrampolines.find(BB->getLabel()); 513 if (Iter != IncomingTrampolines.end()) { 514 for (const MCSymbol *const Trampoline : Iter->getSecond()) { 515 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Trampoline); 516 assert(LPBlock && "Could not find matching landing pad block."); 517 MergedLayout.push_back(LPBlock); 518 } 519 } 520 MergedLayout.push_back(BB); 521 } 522 523 return MergedLayout; 524 } 525 526 } // namespace bolt 527 } // namespace llvm 528