12f09f445SMaksim Panchenko //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 92f09f445SMaksim Panchenko // This file implements the SplitFunctions pass. 102f09f445SMaksim Panchenko // 11a34c753fSRafael Auler //===----------------------------------------------------------------------===// 12a34c753fSRafael Auler 13a34c753fSRafael Auler #include "bolt/Passes/SplitFunctions.h" 1448ff38ceSFabian Parzefall #include "bolt/Core/BinaryBasicBlock.h" 15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h" 16f428db7aSFabian Parzefall #include "bolt/Core/FunctionLayout.h" 17a34c753fSRafael Auler #include "bolt/Core/ParallelUtilities.h" 18c30ab9dcSAmir Ayupov #include "bolt/Utils/CommandLineOpts.h" 1948ff38ceSFabian Parzefall #include "llvm/ADT/STLExtras.h" 2048ff38ceSFabian Parzefall #include "llvm/ADT/SmallVector.h" 2148ff38ceSFabian Parzefall #include "llvm/ADT/iterator_range.h" 22a34c753fSRafael Auler #include "llvm/Support/CommandLine.h" 2396f6ec50SFabian Parzefall #include "llvm/Support/FormatVariadic.h" 24e341e9f0SFabian Parzefall #include <algorithm> 25f428db7aSFabian Parzefall #include <iterator> 264fdbe985SFabian Parzefall #include <memory> 2748ff38ceSFabian Parzefall #include <numeric> 28e341e9f0SFabian Parzefall #include <random> 29a34c753fSRafael Auler #include <vector> 30a34c753fSRafael Auler 31a34c753fSRafael Auler #define DEBUG_TYPE "bolt-opts" 32a34c753fSRafael Auler 33a34c753fSRafael Auler using namespace llvm; 34a34c753fSRafael Auler using namespace bolt; 35a34c753fSRafael Auler 3696f6ec50SFabian Parzefall namespace { 3796f6ec50SFabian Parzefall class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> { 3896f6ec50SFabian Parzefall public: 3996f6ec50SFabian Parzefall explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) 4096f6ec50SFabian Parzefall : cl::parser<bool>(O) {} 4196f6ec50SFabian Parzefall 4296f6ec50SFabian Parzefall bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { 4396f6ec50SFabian Parzefall if (Arg == "2" || Arg == "3") { 4496f6ec50SFabian Parzefall Value = true; 4596f6ec50SFabian Parzefall errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " 4696f6ec50SFabian Parzefall "for option -{1} is deprecated\n", 4796f6ec50SFabian Parzefall Arg, ArgName); 4896f6ec50SFabian Parzefall return false; 4996f6ec50SFabian Parzefall } 5096f6ec50SFabian Parzefall return cl::parser<bool>::parse(O, ArgName, Arg, Value); 5196f6ec50SFabian Parzefall } 5296f6ec50SFabian Parzefall }; 5396f6ec50SFabian Parzefall } // namespace 5496f6ec50SFabian Parzefall 55a34c753fSRafael Auler namespace opts { 56a34c753fSRafael Auler 57a34c753fSRafael Auler extern cl::OptionCategory BoltOptCategory; 58a34c753fSRafael Auler 59a34c753fSRafael Auler extern cl::opt<bool> SplitEH; 60a34c753fSRafael Auler extern cl::opt<unsigned> ExecutionCountThreshold; 61e341e9f0SFabian Parzefall extern cl::opt<uint32_t> RandomSeed; 62a34c753fSRafael Auler 63b92436efSFangrui Song static cl::opt<bool> AggressiveSplitting( 64b92436efSFangrui Song "split-all-cold", cl::desc("outline as many cold basic blocks as possible"), 65a34c753fSRafael Auler cl::cat(BoltOptCategory)); 66a34c753fSRafael Auler 67b92436efSFangrui Song static cl::opt<unsigned> SplitAlignThreshold( 68b92436efSFangrui Song "split-align-threshold", 69a34c753fSRafael Auler cl::desc("when deciding to split a function, apply this alignment " 70a34c753fSRafael Auler "while doing the size comparison (see -split-threshold). " 71a34c753fSRafael Auler "Default value: 2."), 72a34c753fSRafael Auler cl::init(2), 73b92436efSFangrui Song 74b92436efSFangrui Song cl::Hidden, cl::cat(BoltOptCategory)); 75a34c753fSRafael Auler 7696f6ec50SFabian Parzefall static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser> 77a34c753fSRafael Auler SplitFunctions("split-functions", 78f428db7aSFabian Parzefall cl::desc("split functions into fragments"), 79a34c753fSRafael Auler cl::cat(BoltOptCategory)); 80a34c753fSRafael Auler 81b92436efSFangrui Song static cl::opt<unsigned> SplitThreshold( 82b92436efSFangrui Song "split-threshold", 83a34c753fSRafael Auler cl::desc("split function only if its main size is reduced by more than " 84a34c753fSRafael Auler "given amount of bytes. Default value: 0, i.e. split iff the " 85a34c753fSRafael Auler "size is reduced. Note that on some architectures the size can " 86a34c753fSRafael Auler "increase after splitting."), 87b92436efSFangrui Song cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); 88a34c753fSRafael Auler 89f428db7aSFabian Parzefall static cl::opt<SplitFunctionsStrategy> SplitStrategy( 90f428db7aSFabian Parzefall "split-strategy", cl::init(SplitFunctionsStrategy::Profile2), 91f428db7aSFabian Parzefall cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2", 92f428db7aSFabian Parzefall "split each function into a hot and cold fragment " 93f428db7aSFabian Parzefall "using profiling information")), 94076bd22fSShatianWang cl::values(clEnumValN(SplitFunctionsStrategy::CDSplit, "cdsplit", 95076bd22fSShatianWang "split each function into a hot, warm, and cold " 96076bd22fSShatianWang "fragment using profiling information")), 97f428db7aSFabian Parzefall cl::values(clEnumValN( 98f428db7aSFabian Parzefall SplitFunctionsStrategy::Random2, "random2", 99f428db7aSFabian Parzefall "split each function into a hot and cold fragment at a randomly chosen " 100f428db7aSFabian Parzefall "split point (ignoring any available profiling information)")), 101f428db7aSFabian Parzefall cl::values(clEnumValN( 10248ff38ceSFabian Parzefall SplitFunctionsStrategy::RandomN, "randomN", 10348ff38ceSFabian Parzefall "split each function into N fragments at a randomly chosen split " 10448ff38ceSFabian Parzefall "points (ignoring any available profiling information)")), 10548ff38ceSFabian Parzefall cl::values(clEnumValN( 106f428db7aSFabian Parzefall SplitFunctionsStrategy::All, "all", 107f428db7aSFabian Parzefall "split all basic blocks of each function into fragments such that each " 108f428db7aSFabian Parzefall "fragment contains exactly a single basic block")), 109f428db7aSFabian Parzefall cl::desc("strategy used to partition blocks into fragments"), 110f428db7aSFabian Parzefall cl::cat(BoltOptCategory)); 11156bbf813SShatianWang 11256bbf813SShatianWang static cl::opt<double> CallScale( 11356bbf813SShatianWang "call-scale", 11456bbf813SShatianWang cl::desc("Call score scale coefficient (when --split-strategy=cdsplit)"), 11556bbf813SShatianWang cl::init(0.95), cl::ReallyHidden, cl::cat(BoltOptCategory)); 1164483cf2dSShatianWang 1174483cf2dSShatianWang static cl::opt<double> 1184483cf2dSShatianWang CallPower("call-power", 1194483cf2dSShatianWang cl::desc("Call score power (when --split-strategy=cdsplit)"), 1204483cf2dSShatianWang cl::init(0.05), cl::ReallyHidden, cl::cat(BoltOptCategory)); 1214483cf2dSShatianWang 1224483cf2dSShatianWang static cl::opt<double> 1234483cf2dSShatianWang JumpPower("jump-power", 1244483cf2dSShatianWang cl::desc("Jump score power (when --split-strategy=cdsplit)"), 1254483cf2dSShatianWang cl::init(0.15), cl::ReallyHidden, cl::cat(BoltOptCategory)); 126a34c753fSRafael Auler } // namespace opts 127a34c753fSRafael Auler 128e341e9f0SFabian Parzefall namespace { 1294fdbe985SFabian Parzefall bool hasFullProfile(const BinaryFunction &BF) { 1304fdbe985SFabian Parzefall return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) { 1314fdbe985SFabian Parzefall return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE; 1324fdbe985SFabian Parzefall }); 133e341e9f0SFabian Parzefall } 134e341e9f0SFabian Parzefall 1354fdbe985SFabian Parzefall bool allBlocksCold(const BinaryFunction &BF) { 1364fdbe985SFabian Parzefall return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) { 1374fdbe985SFabian Parzefall return BB.getExecutionCount() == 0; 1384fdbe985SFabian Parzefall }); 139e341e9f0SFabian Parzefall } 140e341e9f0SFabian Parzefall 1414fdbe985SFabian Parzefall struct SplitProfile2 final : public SplitStrategy { 1424fdbe985SFabian Parzefall bool canSplit(const BinaryFunction &BF) override { 1434fdbe985SFabian Parzefall return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); 1444fdbe985SFabian Parzefall } 1454fdbe985SFabian Parzefall 146076bd22fSShatianWang bool compactFragments() override { return true; } 147e341e9f0SFabian Parzefall 1484fdbe985SFabian Parzefall void fragment(const BlockIt Start, const BlockIt End) override { 149a0c7ca8aSKazu Hirata for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) { 150ae2b4da1SFabian Parzefall if (BB->getExecutionCount() == 0) 151f428db7aSFabian Parzefall BB->setFragmentNum(FragmentNum::cold()); 152a0c7ca8aSKazu Hirata } 153e341e9f0SFabian Parzefall } 154e341e9f0SFabian Parzefall }; 155e341e9f0SFabian Parzefall 156076bd22fSShatianWang struct SplitCacheDirected final : public SplitStrategy { 15756bbf813SShatianWang BinaryContext &BC; 158076bd22fSShatianWang using BasicBlockOrder = BinaryFunction::BasicBlockOrderType; 159076bd22fSShatianWang 160076bd22fSShatianWang bool canSplit(const BinaryFunction &BF) override { 161076bd22fSShatianWang return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); 162076bd22fSShatianWang } 163076bd22fSShatianWang 16456bbf813SShatianWang explicit SplitCacheDirected(BinaryContext &BC) : BC(BC) { 16556bbf813SShatianWang initializeAuxiliaryVariables(); 16656bbf813SShatianWang buildCallGraph(); 16756bbf813SShatianWang } 16856bbf813SShatianWang 169076bd22fSShatianWang // When some functions are hot-warm split and others are hot-warm-cold split, 170076bd22fSShatianWang // we do not want to change the fragment numbers of the blocks in the hot-warm 171076bd22fSShatianWang // split functions. 172076bd22fSShatianWang bool compactFragments() override { return false; } 173076bd22fSShatianWang 174076bd22fSShatianWang void fragment(const BlockIt Start, const BlockIt End) override { 175076bd22fSShatianWang BasicBlockOrder BlockOrder(Start, End); 176076bd22fSShatianWang BinaryFunction &BF = *BlockOrder.front()->getFunction(); 17715774834SShatianWang // No need to re-split small functions. 17815774834SShatianWang if (BlockOrder.size() <= 2) 17915774834SShatianWang return; 180076bd22fSShatianWang 181076bd22fSShatianWang size_t BestSplitIndex = findSplitIndex(BF, BlockOrder); 18215774834SShatianWang assert(BestSplitIndex < BlockOrder.size()); 183076bd22fSShatianWang 184076bd22fSShatianWang // Assign fragments based on the computed best split index. 185076bd22fSShatianWang // All basic blocks with index up to the best split index become hot. 186076bd22fSShatianWang // All remaining blocks are warm / cold depending on if count is 187c43d0432SShatianWang // greater than zero or not. 188076bd22fSShatianWang for (size_t Index = 0; Index < BlockOrder.size(); Index++) { 189076bd22fSShatianWang BinaryBasicBlock *BB = BlockOrder[Index]; 190076bd22fSShatianWang if (Index <= BestSplitIndex) 191c43d0432SShatianWang BB->setFragmentNum(FragmentNum::main()); 192076bd22fSShatianWang else 193c43d0432SShatianWang BB->setFragmentNum(BB->getKnownExecutionCount() > 0 194c43d0432SShatianWang ? FragmentNum::warm() 195c43d0432SShatianWang : FragmentNum::cold()); 196076bd22fSShatianWang } 197076bd22fSShatianWang } 198076bd22fSShatianWang 199076bd22fSShatianWang private: 20056bbf813SShatianWang struct CallInfo { 20156bbf813SShatianWang size_t Length; 20256bbf813SShatianWang size_t Count; 20356bbf813SShatianWang }; 20456bbf813SShatianWang 2054483cf2dSShatianWang struct SplitScore { 20615774834SShatianWang size_t SplitIndex = size_t(-1); 2074483cf2dSShatianWang size_t HotSizeReduction = 0; 2084483cf2dSShatianWang double LocalScore = 0; 2094483cf2dSShatianWang double CoverCallScore = 0; 21015774834SShatianWang 21115774834SShatianWang double sum() const { return LocalScore + CoverCallScore; } 2124483cf2dSShatianWang }; 2134483cf2dSShatianWang 21456bbf813SShatianWang // Auxiliary variables used by the algorithm. 21556bbf813SShatianWang size_t TotalNumBlocks{0}; 21656bbf813SShatianWang size_t OrigHotSectionSize{0}; 21756bbf813SShatianWang DenseMap<const BinaryBasicBlock *, size_t> GlobalIndices; 21856bbf813SShatianWang DenseMap<const BinaryBasicBlock *, size_t> BBSizes; 21956bbf813SShatianWang DenseMap<const BinaryBasicBlock *, size_t> BBOffsets; 22056bbf813SShatianWang 22156bbf813SShatianWang // Call graph. 22256bbf813SShatianWang std::vector<SmallVector<const BinaryBasicBlock *, 0>> Callers; 22356bbf813SShatianWang std::vector<SmallVector<const BinaryBasicBlock *, 0>> Callees; 22456bbf813SShatianWang 22556bbf813SShatianWang bool shouldConsiderForCallGraph(const BinaryFunction &BF) { 22656bbf813SShatianWang // Only a subset of the functions in the binary will be considered 22756bbf813SShatianWang // for initializing auxiliary variables and building call graph. 22856bbf813SShatianWang return BF.hasValidIndex() && BF.hasValidProfile() && !BF.empty(); 22956bbf813SShatianWang } 23056bbf813SShatianWang 23156bbf813SShatianWang void initializeAuxiliaryVariables() { 23256bbf813SShatianWang for (BinaryFunction *BF : BC.getSortedFunctions()) { 23356bbf813SShatianWang if (!shouldConsiderForCallGraph(*BF)) 23456bbf813SShatianWang continue; 23556bbf813SShatianWang 23656bbf813SShatianWang // Calculate the size of each BB after hot-cold splitting. 23756bbf813SShatianWang // This populates BinaryBasicBlock::OutputAddressRange which 23856bbf813SShatianWang // can be used to compute the size of each BB. 23956bbf813SShatianWang BC.calculateEmittedSize(*BF, /*FixBranches=*/true); 24056bbf813SShatianWang 24156bbf813SShatianWang for (BinaryBasicBlock *BB : BF->getLayout().blocks()) { 24256bbf813SShatianWang // Unique global index. 24356bbf813SShatianWang GlobalIndices[BB] = TotalNumBlocks; 24456bbf813SShatianWang TotalNumBlocks++; 24556bbf813SShatianWang 24656bbf813SShatianWang // Block size after hot-cold splitting. 24756bbf813SShatianWang BBSizes[BB] = BB->getOutputSize(); 24856bbf813SShatianWang 24956bbf813SShatianWang // Hot block offset after hot-cold splitting. 25056bbf813SShatianWang BBOffsets[BB] = OrigHotSectionSize; 25156bbf813SShatianWang if (!BB->isSplit()) 25256bbf813SShatianWang OrigHotSectionSize += BBSizes[BB]; 25356bbf813SShatianWang } 25456bbf813SShatianWang } 25556bbf813SShatianWang } 25656bbf813SShatianWang 25756bbf813SShatianWang void buildCallGraph() { 25856bbf813SShatianWang Callers.resize(TotalNumBlocks); 25956bbf813SShatianWang Callees.resize(TotalNumBlocks); 26056bbf813SShatianWang for (const BinaryFunction *SrcFunction : BC.getSortedFunctions()) { 26156bbf813SShatianWang if (!shouldConsiderForCallGraph(*SrcFunction)) 26256bbf813SShatianWang continue; 26356bbf813SShatianWang 26456bbf813SShatianWang for (BinaryBasicBlock &SrcBB : SrcFunction->blocks()) { 26556bbf813SShatianWang // Skip blocks that are not executed 26656bbf813SShatianWang if (SrcBB.getKnownExecutionCount() == 0) 26756bbf813SShatianWang continue; 26856bbf813SShatianWang 26956bbf813SShatianWang // Find call instructions and extract target symbols from each one 27056bbf813SShatianWang for (const MCInst &Inst : SrcBB) { 27156bbf813SShatianWang if (!BC.MIB->isCall(Inst)) 27256bbf813SShatianWang continue; 27356bbf813SShatianWang 27456bbf813SShatianWang // Call info 27556bbf813SShatianWang const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); 27656bbf813SShatianWang // Ignore calls w/o information 27756bbf813SShatianWang if (!DstSym) 27856bbf813SShatianWang continue; 27956bbf813SShatianWang 28056bbf813SShatianWang const BinaryFunction *DstFunction = BC.getFunctionForSymbol(DstSym); 28156bbf813SShatianWang // Ignore calls that do not have a valid target, but do not ignore 28256bbf813SShatianWang // recursive calls, because caller block could be moved to warm. 28356bbf813SShatianWang if (!DstFunction || DstFunction->getLayout().block_empty()) 28456bbf813SShatianWang continue; 28556bbf813SShatianWang 28656bbf813SShatianWang const BinaryBasicBlock *DstBB = &(DstFunction->front()); 28756bbf813SShatianWang 28856bbf813SShatianWang // Record the call only if DstBB is also in functions to consider for 28956bbf813SShatianWang // call graph. 29056bbf813SShatianWang if (GlobalIndices.contains(DstBB)) { 29156bbf813SShatianWang Callers[GlobalIndices[DstBB]].push_back(&SrcBB); 29256bbf813SShatianWang Callees[GlobalIndices[&SrcBB]].push_back(DstBB); 29356bbf813SShatianWang } 29456bbf813SShatianWang } 29556bbf813SShatianWang } 29656bbf813SShatianWang } 29756bbf813SShatianWang } 29856bbf813SShatianWang 29956bbf813SShatianWang /// Populate BinaryBasicBlock::OutputAddressRange with estimated basic block 30056bbf813SShatianWang /// start and end addresses for hot and warm basic blocks, assuming hot-warm 30156bbf813SShatianWang /// splitting happens at \p SplitIndex. Also return estimated end addresses 30256bbf813SShatianWang /// of the hot fragment before and after splitting. 30356bbf813SShatianWang /// The estimations take into account the potential addition of branch 30456bbf813SShatianWang /// instructions due to split fall through branches as well as the need to 30556bbf813SShatianWang /// use longer branch instructions for split (un)conditional branches. 30656bbf813SShatianWang std::pair<size_t, size_t> 30756bbf813SShatianWang estimatePostSplitBBAddress(const BasicBlockOrder &BlockOrder, 30856bbf813SShatianWang const size_t SplitIndex) { 30956bbf813SShatianWang assert(SplitIndex < BlockOrder.size() && "Invalid split index"); 31056bbf813SShatianWang 31115774834SShatianWang // Update function layout assuming hot-warm splitting at SplitIndex. 31256bbf813SShatianWang for (size_t Index = 0; Index < BlockOrder.size(); Index++) { 31356bbf813SShatianWang BinaryBasicBlock *BB = BlockOrder[Index]; 31456bbf813SShatianWang if (BB->getFragmentNum() == FragmentNum::cold()) 31556bbf813SShatianWang break; 31656bbf813SShatianWang BB->setFragmentNum(Index <= SplitIndex ? FragmentNum::main() 31756bbf813SShatianWang : FragmentNum::warm()); 31856bbf813SShatianWang } 31956bbf813SShatianWang BinaryFunction *BF = BlockOrder[0]->getFunction(); 32056bbf813SShatianWang BF->getLayout().update(BlockOrder); 32156bbf813SShatianWang // Populate BB.OutputAddressRange under the updated layout. 32256bbf813SShatianWang BC.calculateEmittedSize(*BF); 32356bbf813SShatianWang 32456bbf813SShatianWang // Populate BB.OutputAddressRange with estimated new start and end addresses 32556bbf813SShatianWang // and compute the old end address of the hot section and the new end 32656bbf813SShatianWang // address of the hot section. 32715774834SShatianWang size_t OldHotEndAddr{0}; 32815774834SShatianWang size_t NewHotEndAddr{0}; 32956bbf813SShatianWang size_t CurrentAddr = BBOffsets[BlockOrder[0]]; 33056bbf813SShatianWang for (BinaryBasicBlock *BB : BlockOrder) { 33156bbf813SShatianWang // We only care about new addresses of blocks in hot/warm. 33256bbf813SShatianWang if (BB->getFragmentNum() == FragmentNum::cold()) 33356bbf813SShatianWang break; 3344483cf2dSShatianWang const size_t NewSize = BB->getOutputSize(); 33556bbf813SShatianWang BB->setOutputStartAddress(CurrentAddr); 3364483cf2dSShatianWang CurrentAddr += NewSize; 33756bbf813SShatianWang BB->setOutputEndAddress(CurrentAddr); 33856bbf813SShatianWang if (BB->getLayoutIndex() == SplitIndex) { 33956bbf813SShatianWang NewHotEndAddr = CurrentAddr; 34056bbf813SShatianWang // Approximate the start address of the warm fragment of the current 34156bbf813SShatianWang // function using the original hot section size. 34256bbf813SShatianWang CurrentAddr = OrigHotSectionSize; 34356bbf813SShatianWang } 34456bbf813SShatianWang OldHotEndAddr = BBOffsets[BB] + BBSizes[BB]; 34556bbf813SShatianWang } 34656bbf813SShatianWang return std::make_pair(OldHotEndAddr, NewHotEndAddr); 34756bbf813SShatianWang } 34856bbf813SShatianWang 34956bbf813SShatianWang /// Get a collection of "shortenable" calls, that is, calls of type X->Y 35056bbf813SShatianWang /// when the function order is [... X ... BF ... Y ...]. 35156bbf813SShatianWang /// If the hot fragment size of BF is reduced, then such calls are guaranteed 35256bbf813SShatianWang /// to get shorter by the reduced hot fragment size. 35356bbf813SShatianWang std::vector<CallInfo> extractCoverCalls(const BinaryFunction &BF) { 35456bbf813SShatianWang // Record the length and the count of the calls that can be shortened 35556bbf813SShatianWang std::vector<CallInfo> CoverCalls; 35656bbf813SShatianWang if (opts::CallScale == 0) 35756bbf813SShatianWang return CoverCalls; 35856bbf813SShatianWang 35956bbf813SShatianWang const BinaryFunction *ThisBF = &BF; 36056bbf813SShatianWang const BinaryBasicBlock *ThisBB = &(ThisBF->front()); 36156bbf813SShatianWang const size_t ThisGI = GlobalIndices[ThisBB]; 36256bbf813SShatianWang 36356bbf813SShatianWang for (const BinaryFunction *DstBF : BC.getSortedFunctions()) { 36456bbf813SShatianWang if (!shouldConsiderForCallGraph(*DstBF)) 36556bbf813SShatianWang continue; 36656bbf813SShatianWang 36756bbf813SShatianWang const BinaryBasicBlock *DstBB = &(DstBF->front()); 36856bbf813SShatianWang if (DstBB->getKnownExecutionCount() == 0) 36956bbf813SShatianWang continue; 37056bbf813SShatianWang 37156bbf813SShatianWang const size_t DstGI = GlobalIndices[DstBB]; 37256bbf813SShatianWang for (const BinaryBasicBlock *SrcBB : Callers[DstGI]) { 37356bbf813SShatianWang const BinaryFunction *SrcBF = SrcBB->getFunction(); 37456bbf813SShatianWang if (ThisBF == SrcBF) 37556bbf813SShatianWang continue; 37656bbf813SShatianWang 37756bbf813SShatianWang const size_t CallCount = SrcBB->getKnownExecutionCount(); 37856bbf813SShatianWang 37956bbf813SShatianWang const size_t SrcGI = GlobalIndices[SrcBB]; 38056bbf813SShatianWang 38156bbf813SShatianWang const bool IsCoverCall = (SrcGI < ThisGI && ThisGI < DstGI) || 38256bbf813SShatianWang (DstGI <= ThisGI && ThisGI < SrcGI); 38356bbf813SShatianWang if (!IsCoverCall) 38456bbf813SShatianWang continue; 38556bbf813SShatianWang 38656bbf813SShatianWang const size_t SrcBBEndAddr = BBOffsets[SrcBB] + BBSizes[SrcBB]; 38756bbf813SShatianWang const size_t DstBBStartAddr = BBOffsets[DstBB]; 38856bbf813SShatianWang const size_t CallLength = 38956bbf813SShatianWang AbsoluteDifference(SrcBBEndAddr, DstBBStartAddr); 39056bbf813SShatianWang const CallInfo CI{CallLength, CallCount}; 39156bbf813SShatianWang CoverCalls.emplace_back(CI); 39256bbf813SShatianWang } 39356bbf813SShatianWang } 39456bbf813SShatianWang return CoverCalls; 39556bbf813SShatianWang } 39656bbf813SShatianWang 3974483cf2dSShatianWang /// Compute the edge score of a call edge. 3984483cf2dSShatianWang double computeCallScore(uint64_t CallCount, size_t CallLength) { 3994483cf2dSShatianWang // Increase call lengths by 1 to avoid raising 0 to a negative power. 4004483cf2dSShatianWang return opts::CallScale * static_cast<double>(CallCount) / 4014483cf2dSShatianWang std::pow(static_cast<double>(CallLength + 1), opts::CallPower); 4024483cf2dSShatianWang } 4034483cf2dSShatianWang 4044483cf2dSShatianWang /// Compute the edge score of a jump (branch) edge. 4054483cf2dSShatianWang double computeJumpScore(uint64_t JumpCount, size_t JumpLength) { 4064483cf2dSShatianWang // Increase jump lengths by 1 to avoid raising 0 to a negative power. 4074483cf2dSShatianWang return static_cast<double>(JumpCount) / 4084483cf2dSShatianWang std::pow(static_cast<double>(JumpLength + 1), opts::JumpPower); 4094483cf2dSShatianWang } 4104483cf2dSShatianWang 4114483cf2dSShatianWang /// Compute sum of scores over jumps within \p BlockOrder given \p SplitIndex. 4124483cf2dSShatianWang /// Increament Score.LocalScore in place by the sum. 4134483cf2dSShatianWang void computeJumpScore(const BasicBlockOrder &BlockOrder, 4144483cf2dSShatianWang const size_t SplitIndex, SplitScore &Score) { 4154483cf2dSShatianWang 4164483cf2dSShatianWang for (const BinaryBasicBlock *SrcBB : BlockOrder) { 4174483cf2dSShatianWang if (SrcBB->getKnownExecutionCount() == 0) 4184483cf2dSShatianWang continue; 4194483cf2dSShatianWang 4204483cf2dSShatianWang const size_t SrcBBEndAddr = SrcBB->getOutputAddressRange().second; 4214483cf2dSShatianWang 4224483cf2dSShatianWang for (const auto Pair : zip(SrcBB->successors(), SrcBB->branch_info())) { 4234483cf2dSShatianWang const BinaryBasicBlock *DstBB = std::get<0>(Pair); 4244483cf2dSShatianWang const BinaryBasicBlock::BinaryBranchInfo &Branch = std::get<1>(Pair); 4254483cf2dSShatianWang const size_t JumpCount = Branch.Count; 4264483cf2dSShatianWang 4274483cf2dSShatianWang if (JumpCount == 0) 4284483cf2dSShatianWang continue; 4294483cf2dSShatianWang 4304483cf2dSShatianWang const size_t DstBBStartAddr = DstBB->getOutputAddressRange().first; 4314483cf2dSShatianWang const size_t NewJumpLength = 4324483cf2dSShatianWang AbsoluteDifference(SrcBBEndAddr, DstBBStartAddr); 4334483cf2dSShatianWang Score.LocalScore += computeJumpScore(JumpCount, NewJumpLength); 4344483cf2dSShatianWang } 4354483cf2dSShatianWang } 4364483cf2dSShatianWang } 4374483cf2dSShatianWang 4384483cf2dSShatianWang /// Compute sum of scores over calls originated in the current function 4394483cf2dSShatianWang /// given \p SplitIndex. Increament Score.LocalScore in place by the sum. 4404483cf2dSShatianWang void computeLocalCallScore(const BasicBlockOrder &BlockOrder, 4414483cf2dSShatianWang const size_t SplitIndex, SplitScore &Score) { 4424483cf2dSShatianWang if (opts::CallScale == 0) 4434483cf2dSShatianWang return; 4444483cf2dSShatianWang 4454483cf2dSShatianWang // Global index of the last block in the current function. 4464483cf2dSShatianWang // This is later used to determine whether a call originated in the current 4474483cf2dSShatianWang // function is to a function that comes after the current function. 4484483cf2dSShatianWang const size_t LastGlobalIndex = GlobalIndices[BlockOrder.back()]; 4494483cf2dSShatianWang 4504483cf2dSShatianWang // The length of calls originated in the input function can increase / 4514483cf2dSShatianWang // decrease depending on the splitting decision. 4524483cf2dSShatianWang for (const BinaryBasicBlock *SrcBB : BlockOrder) { 4534483cf2dSShatianWang const size_t CallCount = SrcBB->getKnownExecutionCount(); 4544483cf2dSShatianWang // If SrcBB does not call any functions, skip it. 4554483cf2dSShatianWang if (CallCount == 0) 4564483cf2dSShatianWang continue; 4574483cf2dSShatianWang 4584483cf2dSShatianWang // Obtain an estimate on the end address of the src basic block 4594483cf2dSShatianWang // after splitting at SplitIndex. 4604483cf2dSShatianWang const size_t SrcBBEndAddr = SrcBB->getOutputAddressRange().second; 4614483cf2dSShatianWang 4624483cf2dSShatianWang for (const BinaryBasicBlock *DstBB : Callees[GlobalIndices[SrcBB]]) { 4634483cf2dSShatianWang // Obtain an estimate on the start address of the dst basic block 4644483cf2dSShatianWang // after splitting at SplitIndex. If DstBB is in a function before 4654483cf2dSShatianWang // the current function, then its start address remains unchanged. 4664483cf2dSShatianWang size_t DstBBStartAddr = BBOffsets[DstBB]; 4674483cf2dSShatianWang // If DstBB is in a function after the current function, then its 4684483cf2dSShatianWang // start address should be adjusted based on the reduction in hot size. 4694483cf2dSShatianWang if (GlobalIndices[DstBB] > LastGlobalIndex) { 4704483cf2dSShatianWang assert(DstBBStartAddr >= Score.HotSizeReduction); 4714483cf2dSShatianWang DstBBStartAddr -= Score.HotSizeReduction; 4724483cf2dSShatianWang } 4734483cf2dSShatianWang const size_t NewCallLength = 4744483cf2dSShatianWang AbsoluteDifference(SrcBBEndAddr, DstBBStartAddr); 4754483cf2dSShatianWang Score.LocalScore += computeCallScore(CallCount, NewCallLength); 4764483cf2dSShatianWang } 4774483cf2dSShatianWang } 4784483cf2dSShatianWang } 4794483cf2dSShatianWang 4804483cf2dSShatianWang /// Compute sum of splitting scores for cover calls of the input function. 4814483cf2dSShatianWang /// Increament Score.CoverCallScore in place by the sum. 4824483cf2dSShatianWang void computeCoverCallScore(const BasicBlockOrder &BlockOrder, 4834483cf2dSShatianWang const size_t SplitIndex, 4844483cf2dSShatianWang const std::vector<CallInfo> &CoverCalls, 4854483cf2dSShatianWang SplitScore &Score) { 4864483cf2dSShatianWang if (opts::CallScale == 0) 4874483cf2dSShatianWang return; 4884483cf2dSShatianWang 4894483cf2dSShatianWang for (const CallInfo CI : CoverCalls) { 4904483cf2dSShatianWang assert(CI.Length >= Score.HotSizeReduction && 4914483cf2dSShatianWang "Length of cover calls must exceed reduced size of hot fragment."); 4924483cf2dSShatianWang // Compute the new length of the call, which is shorter than the original 4934483cf2dSShatianWang // one by the size of the splitted fragment minus the total size increase. 4944483cf2dSShatianWang const size_t NewCallLength = CI.Length - Score.HotSizeReduction; 4954483cf2dSShatianWang Score.CoverCallScore += computeCallScore(CI.Count, NewCallLength); 4964483cf2dSShatianWang } 4974483cf2dSShatianWang } 4984483cf2dSShatianWang 4994483cf2dSShatianWang /// Compute the split score of splitting a function at a given index. 50015774834SShatianWang /// The split score consists of local score and cover score. This function 50115774834SShatianWang /// returns \p Score of SplitScore type. It contains the local score and 50215774834SShatianWang /// cover score of the current splitting index. For easier book keeping and 50315774834SShatianWang /// comparison, it also stores the split index and the resulting reduction 50415774834SShatianWang /// in hot fragment size. 5054483cf2dSShatianWang SplitScore computeSplitScore(const BinaryFunction &BF, 5064483cf2dSShatianWang const BasicBlockOrder &BlockOrder, 5074483cf2dSShatianWang const size_t SplitIndex, 50815774834SShatianWang const std::vector<CallInfo> &CoverCalls) { 5094483cf2dSShatianWang // Populate BinaryBasicBlock::OutputAddressRange with estimated 5104483cf2dSShatianWang // new start and end addresses after hot-warm splitting at SplitIndex. 5114483cf2dSShatianWang size_t OldHotEnd; 5124483cf2dSShatianWang size_t NewHotEnd; 5134483cf2dSShatianWang std::tie(OldHotEnd, NewHotEnd) = 5144483cf2dSShatianWang estimatePostSplitBBAddress(BlockOrder, SplitIndex); 5154483cf2dSShatianWang 5164483cf2dSShatianWang SplitScore Score; 5174483cf2dSShatianWang Score.SplitIndex = SplitIndex; 5184483cf2dSShatianWang 5194483cf2dSShatianWang // It's not worth splitting if OldHotEnd < NewHotEnd. 5204483cf2dSShatianWang if (OldHotEnd < NewHotEnd) 5214483cf2dSShatianWang return Score; 5224483cf2dSShatianWang 5234483cf2dSShatianWang // Hot fragment size reduction due to splitting. 5244483cf2dSShatianWang Score.HotSizeReduction = OldHotEnd - NewHotEnd; 5254483cf2dSShatianWang 5264483cf2dSShatianWang // First part of LocalScore is the sum over call edges originated in the 5274483cf2dSShatianWang // input function. These edges can get shorter or longer depending on 5284483cf2dSShatianWang // SplitIndex. Score.LocalScore is increamented in place. 5294483cf2dSShatianWang computeLocalCallScore(BlockOrder, SplitIndex, Score); 5304483cf2dSShatianWang 5314483cf2dSShatianWang // Second part of LocalScore is the sum over jump edges with src basic block 5324483cf2dSShatianWang // and dst basic block in the current function. Score.LocalScore is 5334483cf2dSShatianWang // increamented in place. 5344483cf2dSShatianWang computeJumpScore(BlockOrder, SplitIndex, Score); 5354483cf2dSShatianWang 5364483cf2dSShatianWang // Compute CoverCallScore and store in Score in place. 5374483cf2dSShatianWang computeCoverCallScore(BlockOrder, SplitIndex, CoverCalls, Score); 5384483cf2dSShatianWang return Score; 5394483cf2dSShatianWang } 5404483cf2dSShatianWang 54115774834SShatianWang /// Find the most likely successor of a basic block when it has one or two 54215774834SShatianWang /// successors. Return nullptr otherwise. 54315774834SShatianWang const BinaryBasicBlock *getMostLikelySuccessor(const BinaryBasicBlock *BB) { 54415774834SShatianWang if (BB->succ_size() == 1) 54515774834SShatianWang return BB->getSuccessor(); 54615774834SShatianWang if (BB->succ_size() == 2) { 54715774834SShatianWang uint64_t TakenCount = BB->getTakenBranchInfo().Count; 54815774834SShatianWang assert(TakenCount != BinaryBasicBlock::COUNT_NO_PROFILE); 54915774834SShatianWang uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count; 55015774834SShatianWang assert(NonTakenCount != BinaryBasicBlock::COUNT_NO_PROFILE); 55115774834SShatianWang if (TakenCount > NonTakenCount) 55215774834SShatianWang return BB->getConditionalSuccessor(true); 55315774834SShatianWang else if (TakenCount < NonTakenCount) 55415774834SShatianWang return BB->getConditionalSuccessor(false); 55515774834SShatianWang } 55615774834SShatianWang return nullptr; 55715774834SShatianWang } 55815774834SShatianWang 559076bd22fSShatianWang /// Find the best index for splitting. The returned value is the index of the 560076bd22fSShatianWang /// last hot basic block. Hence, "no splitting" is equivalent to returning the 561076bd22fSShatianWang /// value which is one less than the size of the function. 562076bd22fSShatianWang size_t findSplitIndex(const BinaryFunction &BF, 563076bd22fSShatianWang const BasicBlockOrder &BlockOrder) { 56415774834SShatianWang assert(BlockOrder.size() > 2); 5654483cf2dSShatianWang // Find all function calls that can be shortened if we move blocks of the 5664483cf2dSShatianWang // current function to warm/cold 5674483cf2dSShatianWang const std::vector<CallInfo> CoverCalls = extractCoverCalls(BF); 5684483cf2dSShatianWang 56915774834SShatianWang // Find the existing hot-cold splitting index. 57015774834SShatianWang size_t HotColdIndex = 0; 57115774834SShatianWang while (HotColdIndex + 1 < BlockOrder.size()) { 57215774834SShatianWang if (BlockOrder[HotColdIndex + 1]->getFragmentNum() == FragmentNum::cold()) 5734483cf2dSShatianWang break; 57415774834SShatianWang HotColdIndex++; 57515774834SShatianWang } 57615774834SShatianWang assert(HotColdIndex + 1 == BlockOrder.size() || 57715774834SShatianWang (BlockOrder[HotColdIndex]->getFragmentNum() == FragmentNum::main() && 57815774834SShatianWang BlockOrder[HotColdIndex + 1]->getFragmentNum() == 57915774834SShatianWang FragmentNum::cold())); 58015774834SShatianWang 58115774834SShatianWang // Try all possible split indices up to HotColdIndex (blocks that have 58215774834SShatianWang // Index <= SplitIndex are in hot) and find the one maximizing the 58315774834SShatianWang // splitting score. 58415774834SShatianWang SplitScore BestScore; 58515774834SShatianWang for (size_t Index = 0; Index <= HotColdIndex; Index++) { 58615774834SShatianWang const BinaryBasicBlock *LastHotBB = BlockOrder[Index]; 58715774834SShatianWang assert(LastHotBB->getFragmentNum() != FragmentNum::cold()); 58815774834SShatianWang 58915774834SShatianWang // Do not break jump to the most likely successor. 59015774834SShatianWang if (Index + 1 < BlockOrder.size() && 59115774834SShatianWang BlockOrder[Index + 1] == getMostLikelySuccessor(LastHotBB)) 59215774834SShatianWang continue; 59315774834SShatianWang 5944483cf2dSShatianWang const SplitScore Score = 59515774834SShatianWang computeSplitScore(BF, BlockOrder, Index, CoverCalls); 59615774834SShatianWang if (Score.sum() > BestScore.sum()) 5974483cf2dSShatianWang BestScore = Score; 5984483cf2dSShatianWang } 59915774834SShatianWang 60015774834SShatianWang // If we don't find a good splitting point, fallback to the original one. 60115774834SShatianWang if (BestScore.SplitIndex == size_t(-1)) 60215774834SShatianWang return HotColdIndex; 6034483cf2dSShatianWang 6044483cf2dSShatianWang return BestScore.SplitIndex; 605076bd22fSShatianWang } 606076bd22fSShatianWang }; 607076bd22fSShatianWang 6084fdbe985SFabian Parzefall struct SplitRandom2 final : public SplitStrategy { 6094fdbe985SFabian Parzefall std::minstd_rand0 Gen; 610e341e9f0SFabian Parzefall 6114fdbe985SFabian Parzefall SplitRandom2() : Gen(opts::RandomSeed.getValue()) {} 612e341e9f0SFabian Parzefall 6134fdbe985SFabian Parzefall bool canSplit(const BinaryFunction &BF) override { return true; } 614e341e9f0SFabian Parzefall 615076bd22fSShatianWang bool compactFragments() override { return true; } 616ae2b4da1SFabian Parzefall 6174fdbe985SFabian Parzefall void fragment(const BlockIt Start, const BlockIt End) override { 6184fdbe985SFabian Parzefall using DiffT = typename std::iterator_traits<BlockIt>::difference_type; 619ae2b4da1SFabian Parzefall const DiffT NumBlocks = End - Start; 620ae2b4da1SFabian Parzefall assert(NumBlocks > 0 && "Cannot fragment empty function"); 621e341e9f0SFabian Parzefall 622ae2b4da1SFabian Parzefall // We want to split at least one block 623ae2b4da1SFabian Parzefall const auto LastSplitPoint = std::max<DiffT>(NumBlocks - 1, 1); 624ae2b4da1SFabian Parzefall std::uniform_int_distribution<DiffT> Dist(1, LastSplitPoint); 625ae2b4da1SFabian Parzefall const DiffT SplitPoint = Dist(Gen); 626ae2b4da1SFabian Parzefall for (BinaryBasicBlock *BB : llvm::make_range(Start + SplitPoint, End)) 627f428db7aSFabian Parzefall BB->setFragmentNum(FragmentNum::cold()); 628e341e9f0SFabian Parzefall 629e341e9f0SFabian Parzefall LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of " 630e341e9f0SFabian Parzefall "{1} possible) blocks to split\n", 631ae2b4da1SFabian Parzefall NumBlocks - SplitPoint, End - Start)); 632f428db7aSFabian Parzefall } 633f428db7aSFabian Parzefall }; 634e341e9f0SFabian Parzefall 6354fdbe985SFabian Parzefall struct SplitRandomN final : public SplitStrategy { 6364fdbe985SFabian Parzefall std::minstd_rand0 Gen; 63748ff38ceSFabian Parzefall 6384fdbe985SFabian Parzefall SplitRandomN() : Gen(opts::RandomSeed.getValue()) {} 63948ff38ceSFabian Parzefall 6404fdbe985SFabian Parzefall bool canSplit(const BinaryFunction &BF) override { return true; } 64148ff38ceSFabian Parzefall 642076bd22fSShatianWang bool compactFragments() override { return true; } 643ae2b4da1SFabian Parzefall 6444fdbe985SFabian Parzefall void fragment(const BlockIt Start, const BlockIt End) override { 6454fdbe985SFabian Parzefall using DiffT = typename std::iterator_traits<BlockIt>::difference_type; 646ae2b4da1SFabian Parzefall const DiffT NumBlocks = End - Start; 647ae2b4da1SFabian Parzefall assert(NumBlocks > 0 && "Cannot fragment empty function"); 64848ff38ceSFabian Parzefall 649ae2b4da1SFabian Parzefall // With n blocks, there are n-1 places to split them. 650ae2b4da1SFabian Parzefall const DiffT MaximumSplits = NumBlocks - 1; 651ae2b4da1SFabian Parzefall // We want to generate at least two fragment if possible, but if there is 652ae2b4da1SFabian Parzefall // only one block, no splits are possible. 653ae2b4da1SFabian Parzefall const auto MinimumSplits = std::min<DiffT>(MaximumSplits, 1); 654ae2b4da1SFabian Parzefall std::uniform_int_distribution<DiffT> Dist(MinimumSplits, MaximumSplits); 65548ff38ceSFabian Parzefall // Choose how many splits to perform 6564fdbe985SFabian Parzefall const DiffT NumSplits = Dist(Gen); 65748ff38ceSFabian Parzefall 65848ff38ceSFabian Parzefall // Draw split points from a lottery 659ae2b4da1SFabian Parzefall SmallVector<unsigned, 0> Lottery(MaximumSplits); 660ae2b4da1SFabian Parzefall // Start lottery at 1, because there is no meaningful splitpoint before the 661ae2b4da1SFabian Parzefall // first block. 662ae2b4da1SFabian Parzefall std::iota(Lottery.begin(), Lottery.end(), 1u); 6634fdbe985SFabian Parzefall std::shuffle(Lottery.begin(), Lottery.end(), Gen); 66448ff38ceSFabian Parzefall Lottery.resize(NumSplits); 66548ff38ceSFabian Parzefall llvm::sort(Lottery); 66648ff38ceSFabian Parzefall 66748ff38ceSFabian Parzefall // Add one past the end entry to lottery 668ae2b4da1SFabian Parzefall Lottery.push_back(NumBlocks); 66948ff38ceSFabian Parzefall 67048ff38ceSFabian Parzefall unsigned LotteryIndex = 0; 67148ff38ceSFabian Parzefall unsigned BBPos = 0; 67248ff38ceSFabian Parzefall for (BinaryBasicBlock *const BB : make_range(Start, End)) { 67348ff38ceSFabian Parzefall // Check whether to start new fragment 67448ff38ceSFabian Parzefall if (BBPos >= Lottery[LotteryIndex]) 67548ff38ceSFabian Parzefall ++LotteryIndex; 67648ff38ceSFabian Parzefall 67748ff38ceSFabian Parzefall // Because LotteryIndex is 0 based and cold fragments are 1 based, we can 67848ff38ceSFabian Parzefall // use the index to assign fragments. 67948ff38ceSFabian Parzefall BB->setFragmentNum(FragmentNum(LotteryIndex)); 68048ff38ceSFabian Parzefall 68148ff38ceSFabian Parzefall ++BBPos; 68248ff38ceSFabian Parzefall } 68348ff38ceSFabian Parzefall } 68448ff38ceSFabian Parzefall }; 68548ff38ceSFabian Parzefall 6864fdbe985SFabian Parzefall struct SplitAll final : public SplitStrategy { 6874fdbe985SFabian Parzefall bool canSplit(const BinaryFunction &BF) override { return true; } 688f428db7aSFabian Parzefall 689076bd22fSShatianWang bool compactFragments() override { 6903ac46f37SFabian Parzefall // Keeping empty fragments allows us to test, that empty fragments do not 6913ac46f37SFabian Parzefall // generate symbols. 692076bd22fSShatianWang return false; 6933ac46f37SFabian Parzefall } 694ae2b4da1SFabian Parzefall 6954fdbe985SFabian Parzefall void fragment(const BlockIt Start, const BlockIt End) override { 696ae2b4da1SFabian Parzefall unsigned Fragment = 0; 697ae2b4da1SFabian Parzefall for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) 698f428db7aSFabian Parzefall BB->setFragmentNum(FragmentNum(Fragment++)); 699a0c7ca8aSKazu Hirata } 700e341e9f0SFabian Parzefall }; 701e341e9f0SFabian Parzefall } // namespace 702e341e9f0SFabian Parzefall 703a34c753fSRafael Auler namespace llvm { 704a34c753fSRafael Auler namespace bolt { 705a34c753fSRafael Auler 706a34c753fSRafael Auler bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { 707a34c753fSRafael Auler // Apply execution count threshold 708a34c753fSRafael Auler if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) 709a34c753fSRafael Auler return false; 710a34c753fSRafael Auler 711a34c753fSRafael Auler return BinaryFunctionPass::shouldOptimize(BF); 712a34c753fSRafael Auler } 713a34c753fSRafael Auler 714a5f3d1a8SAmir Ayupov Error SplitFunctions::runOnFunctions(BinaryContext &BC) { 71596f6ec50SFabian Parzefall if (!opts::SplitFunctions) 716a5f3d1a8SAmir Ayupov return Error::success(); 717a34c753fSRafael Auler 718dd09a7dbSMaksim Panchenko if (BC.IsLinuxKernel && BC.BOLTReserved.empty()) { 719dd09a7dbSMaksim Panchenko BC.errs() << "BOLT-ERROR: split functions require reserved space in the " 720dd09a7dbSMaksim Panchenko "Linux kernel binary\n"; 721dd09a7dbSMaksim Panchenko exit(1); 722dd09a7dbSMaksim Panchenko } 723dd09a7dbSMaksim Panchenko 724076bd22fSShatianWang // If split strategy is not CDSplit, then a second run of the pass is not 725076bd22fSShatianWang // needed after function reordering. 726076bd22fSShatianWang if (BC.HasFinalizedFunctionOrder && 727076bd22fSShatianWang opts::SplitStrategy != SplitFunctionsStrategy::CDSplit) 728a5f3d1a8SAmir Ayupov return Error::success(); 729076bd22fSShatianWang 7304fdbe985SFabian Parzefall std::unique_ptr<SplitStrategy> Strategy; 731f428db7aSFabian Parzefall bool ForceSequential = false; 732f428db7aSFabian Parzefall 733f428db7aSFabian Parzefall switch (opts::SplitStrategy) { 734076bd22fSShatianWang case SplitFunctionsStrategy::CDSplit: 735076bd22fSShatianWang // CDSplit runs two splitting passes: hot-cold splitting (SplitPrfoile2) 736076bd22fSShatianWang // before function reordering and hot-warm-cold splitting 737076bd22fSShatianWang // (SplitCacheDirected) after function reordering. 738076bd22fSShatianWang if (BC.HasFinalizedFunctionOrder) 73956bbf813SShatianWang Strategy = std::make_unique<SplitCacheDirected>(BC); 740076bd22fSShatianWang else 741076bd22fSShatianWang Strategy = std::make_unique<SplitProfile2>(); 742076bd22fSShatianWang opts::AggressiveSplitting = true; 743c43d0432SShatianWang BC.HasWarmSection = true; 744076bd22fSShatianWang break; 745f428db7aSFabian Parzefall case SplitFunctionsStrategy::Profile2: 7464fdbe985SFabian Parzefall Strategy = std::make_unique<SplitProfile2>(); 747f428db7aSFabian Parzefall break; 748f428db7aSFabian Parzefall case SplitFunctionsStrategy::Random2: 7494fdbe985SFabian Parzefall Strategy = std::make_unique<SplitRandom2>(); 750f428db7aSFabian Parzefall // If we split functions randomly, we need to ensure that across runs with 751f428db7aSFabian Parzefall // the same input, we generate random numbers for each function in the same 752f428db7aSFabian Parzefall // order. 753f428db7aSFabian Parzefall ForceSequential = true; 754f428db7aSFabian Parzefall break; 75548ff38ceSFabian Parzefall case SplitFunctionsStrategy::RandomN: 7564fdbe985SFabian Parzefall Strategy = std::make_unique<SplitRandomN>(); 75748ff38ceSFabian Parzefall ForceSequential = true; 75848ff38ceSFabian Parzefall break; 759f428db7aSFabian Parzefall case SplitFunctionsStrategy::All: 7604fdbe985SFabian Parzefall Strategy = std::make_unique<SplitAll>(); 761f428db7aSFabian Parzefall break; 762f428db7aSFabian Parzefall } 763a34c753fSRafael Auler 764a34c753fSRafael Auler ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 765a34c753fSRafael Auler return !shouldOptimize(BF); 766a34c753fSRafael Auler }; 767a34c753fSRafael Auler 768a34c753fSRafael Auler ParallelUtilities::runOnEachFunction( 7694fdbe985SFabian Parzefall BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, 7704fdbe985SFabian Parzefall [&](BinaryFunction &BF) { splitFunction(BF, *Strategy); }, SkipFunc, 771e341e9f0SFabian Parzefall "SplitFunctions", ForceSequential); 772a34c753fSRafael Auler 773f92ab6afSAmir Ayupov if (SplitBytesHot + SplitBytesCold > 0) 77452cf0711SAmir Ayupov BC.outs() << "BOLT-INFO: splitting separates " << SplitBytesHot 775a34c753fSRafael Auler << " hot bytes from " << SplitBytesCold << " cold bytes " 776a34c753fSRafael Auler << format("(%.2lf%% of split functions is hot).\n", 77752cf0711SAmir Ayupov 100.0 * SplitBytesHot / 77852cf0711SAmir Ayupov (SplitBytesHot + SplitBytesCold)); 779a5f3d1a8SAmir Ayupov return Error::success(); 780a34c753fSRafael Auler } 781a34c753fSRafael Auler 7824fdbe985SFabian Parzefall void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { 783e341e9f0SFabian Parzefall if (BF.empty()) 784a34c753fSRafael Auler return; 785a34c753fSRafael Auler 786f428db7aSFabian Parzefall if (!S.canSplit(BF)) 787a34c753fSRafael Auler return; 788a34c753fSRafael Auler 7898477bc67SFabian Parzefall FunctionLayout &Layout = BF.getLayout(); 7908477bc67SFabian Parzefall BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(), 7918477bc67SFabian Parzefall Layout.block_end()); 792a34c753fSRafael Auler 793a34c753fSRafael Auler BinaryContext &BC = BF.getBinaryContext(); 794a34c753fSRafael Auler size_t OriginalHotSize; 795a34c753fSRafael Auler size_t HotSize; 796a34c753fSRafael Auler size_t ColdSize; 797a34c753fSRafael Auler if (BC.isX86()) { 798a34c753fSRafael Auler std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF); 799a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 800a34c753fSRafael Auler << " pre-split is <0x" 801a34c753fSRafael Auler << Twine::utohexstr(OriginalHotSize) << ", 0x" 802a34c753fSRafael Auler << Twine::utohexstr(ColdSize) << ">\n"); 803f263a66bSMaksim Panchenko } 804a34c753fSRafael Auler 8058477bc67SFabian Parzefall BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(), 8068477bc67SFabian Parzefall Layout.block_end()); 807a34c753fSRafael Auler // Never outline the first basic block. 8088477bc67SFabian Parzefall NewLayout.front()->setCanOutline(false); 8098477bc67SFabian Parzefall for (BinaryBasicBlock *const BB : NewLayout) { 810a34c753fSRafael Auler if (!BB->canOutline()) 811a34c753fSRafael Auler continue; 812ae2b4da1SFabian Parzefall 813a34c753fSRafael Auler // Do not split extra entry points in aarch64. They can be referred by 814a34c753fSRafael Auler // using ADRs and when this happens, these blocks cannot be placed far 815a34c753fSRafael Auler // away due to the limited range in ADR instruction. 816a34c753fSRafael Auler if (BC.isAArch64() && BB->isEntryPoint()) { 817a34c753fSRafael Auler BB->setCanOutline(false); 818a34c753fSRafael Auler continue; 819a34c753fSRafael Auler } 820f263a66bSMaksim Panchenko 821a34c753fSRafael Auler if (BF.hasEHRanges() && !opts::SplitEH) { 822f263a66bSMaksim Panchenko // We cannot move landing pads (or rather entry points for landing pads). 823a34c753fSRafael Auler if (BB->isLandingPad()) { 824a34c753fSRafael Auler BB->setCanOutline(false); 825a34c753fSRafael Auler continue; 826a34c753fSRafael Auler } 827a34c753fSRafael Auler // We cannot move a block that can throw since exception-handling 828a34c753fSRafael Auler // runtime cannot deal with split functions. However, if we can guarantee 829a34c753fSRafael Auler // that the block never throws, it is safe to move the block to 830a34c753fSRafael Auler // decrease the size of the function. 831a34c753fSRafael Auler for (MCInst &Instr : *BB) { 832f263a66bSMaksim Panchenko if (BC.MIB->isInvoke(Instr)) { 833a34c753fSRafael Auler BB->setCanOutline(false); 834a34c753fSRafael Auler break; 835a34c753fSRafael Auler } 836a34c753fSRafael Auler } 837a34c753fSRafael Auler } 838dd09a7dbSMaksim Panchenko 839dd09a7dbSMaksim Panchenko // Outlining blocks with dynamic branches is not supported yet. 840dd09a7dbSMaksim Panchenko if (BC.IsLinuxKernel) { 841dd09a7dbSMaksim Panchenko if (llvm::any_of( 842dd09a7dbSMaksim Panchenko *BB, [&](MCInst &Inst) { return BC.MIB->isDynamicBranch(Inst); })) 843dd09a7dbSMaksim Panchenko BB->setCanOutline(false); 844dd09a7dbSMaksim Panchenko } 845a34c753fSRafael Auler } 846a34c753fSRafael Auler 847ae2b4da1SFabian Parzefall BF.getLayout().updateLayoutIndices(); 848ae2b4da1SFabian Parzefall S.fragment(NewLayout.begin(), NewLayout.end()); 849ae2b4da1SFabian Parzefall 850ae2b4da1SFabian Parzefall // Make sure all non-outlineable blocks are in the main-fragment. 851ae2b4da1SFabian Parzefall for (BinaryBasicBlock *const BB : NewLayout) { 852ae2b4da1SFabian Parzefall if (!BB->canOutline()) 853ae2b4da1SFabian Parzefall BB->setFragmentNum(FragmentNum::main()); 854ae2b4da1SFabian Parzefall } 855ae2b4da1SFabian Parzefall 856a34c753fSRafael Auler if (opts::AggressiveSplitting) { 857a34c753fSRafael Auler // All blocks with 0 count that we can move go to the end of the function. 858a34c753fSRafael Auler // Even if they were natural to cluster formation and were seen in-between 859a34c753fSRafael Auler // hot basic blocks. 860ae2b4da1SFabian Parzefall llvm::stable_sort(NewLayout, [&](const BinaryBasicBlock *const A, 861ae2b4da1SFabian Parzefall const BinaryBasicBlock *const B) { 862ae2b4da1SFabian Parzefall return A->getFragmentNum() < B->getFragmentNum(); 863a34c753fSRafael Auler }); 864a34c753fSRafael Auler } else if (BF.hasEHRanges() && !opts::SplitEH) { 865a34c753fSRafael Auler // Typically functions with exception handling have landing pads at the end. 866a34c753fSRafael Auler // We cannot move beginning of landing pads, but we can move 0-count blocks 867a34c753fSRafael Auler // comprising landing pads to the end and thus facilitate splitting. 8688477bc67SFabian Parzefall auto FirstLP = NewLayout.begin(); 869a34c753fSRafael Auler while ((*FirstLP)->isLandingPad()) 870a34c753fSRafael Auler ++FirstLP; 871a34c753fSRafael Auler 8728477bc67SFabian Parzefall std::stable_sort(FirstLP, NewLayout.end(), 873a34c753fSRafael Auler [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 874ae2b4da1SFabian Parzefall return A->getFragmentNum() < B->getFragmentNum(); 875a34c753fSRafael Auler }); 876a34c753fSRafael Auler } 877a34c753fSRafael Auler 878ae2b4da1SFabian Parzefall // Make sure that fragments are increasing. 879ae2b4da1SFabian Parzefall FragmentNum CurrentFragment = NewLayout.back()->getFragmentNum(); 880ae2b4da1SFabian Parzefall for (BinaryBasicBlock *const BB : reverse(NewLayout)) { 881ae2b4da1SFabian Parzefall if (BB->getFragmentNum() > CurrentFragment) 882ae2b4da1SFabian Parzefall BB->setFragmentNum(CurrentFragment); 883ae2b4da1SFabian Parzefall CurrentFragment = BB->getFragmentNum(); 884ae2b4da1SFabian Parzefall } 885f428db7aSFabian Parzefall 886076bd22fSShatianWang if (S.compactFragments()) { 887ae2b4da1SFabian Parzefall FragmentNum CurrentFragment = FragmentNum::main(); 888ae2b4da1SFabian Parzefall FragmentNum NewFragment = FragmentNum::main(); 889ae2b4da1SFabian Parzefall for (BinaryBasicBlock *const BB : NewLayout) { 890ae2b4da1SFabian Parzefall if (BB->getFragmentNum() > CurrentFragment) { 891ae2b4da1SFabian Parzefall CurrentFragment = BB->getFragmentNum(); 892ae2b4da1SFabian Parzefall NewFragment = FragmentNum(NewFragment.get() + 1); 893ae2b4da1SFabian Parzefall } 894ae2b4da1SFabian Parzefall BB->setFragmentNum(NewFragment); 895ae2b4da1SFabian Parzefall } 896ae2b4da1SFabian Parzefall } 897ae2b4da1SFabian Parzefall 898076bd22fSShatianWang const bool LayoutUpdated = BF.getLayout().update(NewLayout); 899a34c753fSRafael Auler 900ed743045SMaksim Panchenko // For shared objects, invoke instructions and corresponding landing pads 901ed743045SMaksim Panchenko // have to be placed in the same fragment. When we split them, create 902ed743045SMaksim Panchenko // trampoline landing pads that will redirect the execution to real LPs. 903ed743045SMaksim Panchenko TrampolineSetType Trampolines; 904*92301180SMaksim Panchenko if (BF.hasEHRanges() && BF.isSplit()) { 905105ecd8bSMaksim Panchenko // If all landing pads for this fragment are grouped in one (potentially 906105ecd8bSMaksim Panchenko // different) fragment, we can set LPStart to the start of that fragment 907105ecd8bSMaksim Panchenko // and avoid trampoline code. 908105ecd8bSMaksim Panchenko bool NeedsTrampolines = false; 909105ecd8bSMaksim Panchenko for (FunctionFragment &FF : BF.getLayout().fragments()) { 910105ecd8bSMaksim Panchenko // Vector of fragments that contain landing pads for this fragment. 911105ecd8bSMaksim Panchenko SmallVector<FragmentNum, 4> LandingPadFragments; 912105ecd8bSMaksim Panchenko for (const BinaryBasicBlock *BB : FF) 913105ecd8bSMaksim Panchenko for (const BinaryBasicBlock *LPB : BB->landing_pads()) 914105ecd8bSMaksim Panchenko LandingPadFragments.push_back(LPB->getFragmentNum()); 915105ecd8bSMaksim Panchenko 916105ecd8bSMaksim Panchenko // Eliminate duplicate entries from the vector. 917105ecd8bSMaksim Panchenko llvm::sort(LandingPadFragments); 918105ecd8bSMaksim Panchenko auto Last = llvm::unique(LandingPadFragments); 919105ecd8bSMaksim Panchenko LandingPadFragments.erase(Last, LandingPadFragments.end()); 920105ecd8bSMaksim Panchenko 921105ecd8bSMaksim Panchenko if (LandingPadFragments.size() == 0) { 922105ecd8bSMaksim Panchenko // If the fragment has no landing pads, we can safely set itself as its 923105ecd8bSMaksim Panchenko // landing pad fragment. 924105ecd8bSMaksim Panchenko BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum()); 925105ecd8bSMaksim Panchenko } else if (LandingPadFragments.size() == 1) { 926105ecd8bSMaksim Panchenko BF.setLPFragment(FF.getFragmentNum(), LandingPadFragments.front()); 927105ecd8bSMaksim Panchenko } else { 928*92301180SMaksim Panchenko if (!BC.HasFixedLoadAddress) { 929105ecd8bSMaksim Panchenko NeedsTrampolines = true; 930105ecd8bSMaksim Panchenko break; 931*92301180SMaksim Panchenko } else { 932*92301180SMaksim Panchenko BF.setLPFragment(FF.getFragmentNum(), std::nullopt); 933*92301180SMaksim Panchenko } 934105ecd8bSMaksim Panchenko } 935105ecd8bSMaksim Panchenko } 936105ecd8bSMaksim Panchenko 937105ecd8bSMaksim Panchenko // Trampolines guarantee that all landing pads for any given fragment will 938105ecd8bSMaksim Panchenko // be contained in the same fragment. 939105ecd8bSMaksim Panchenko if (NeedsTrampolines) { 940105ecd8bSMaksim Panchenko for (FunctionFragment &FF : BF.getLayout().fragments()) 941105ecd8bSMaksim Panchenko BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum()); 942ed743045SMaksim Panchenko Trampolines = createEHTrampolines(BF); 943105ecd8bSMaksim Panchenko } 944105ecd8bSMaksim Panchenko } 945f263a66bSMaksim Panchenko 946a34c753fSRafael Auler // Check the new size to see if it's worth splitting the function. 947076bd22fSShatianWang if (BC.isX86() && LayoutUpdated) { 948a34c753fSRafael Auler std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); 949a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 950a34c753fSRafael Auler << " post-split is <0x" << Twine::utohexstr(HotSize) 951a34c753fSRafael Auler << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); 952a34c753fSRafael Auler if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <= 953a34c753fSRafael Auler alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) { 954c30ab9dcSAmir Ayupov if (opts::Verbosity >= 2) { 95552cf0711SAmir Ayupov BC.outs() << "BOLT-INFO: Reversing splitting of function " 956c30ab9dcSAmir Ayupov << formatv("{0}:\n {1:x}, {2:x} -> {3:x}\n", BF, HotSize, 957c30ab9dcSAmir Ayupov ColdSize, OriginalHotSize); 958c30ab9dcSAmir Ayupov } 959a34c753fSRafael Auler 960ed743045SMaksim Panchenko // Reverse the action of createEHTrampolines(). The trampolines will be 961ed743045SMaksim Panchenko // placed immediately before the matching destination resulting in no 962ed743045SMaksim Panchenko // extra code. 963ed743045SMaksim Panchenko if (PreSplitLayout.size() != BF.size()) 964ed743045SMaksim Panchenko PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines); 965ed743045SMaksim Panchenko 966f92ab6afSAmir Ayupov for (BinaryBasicBlock &BB : BF) 9670f74d191SFabian Parzefall BB.setFragmentNum(FragmentNum::main()); 9688477bc67SFabian Parzefall BF.getLayout().update(PreSplitLayout); 969a34c753fSRafael Auler } else { 970a34c753fSRafael Auler SplitBytesHot += HotSize; 971a34c753fSRafael Auler SplitBytesCold += ColdSize; 972a34c753fSRafael Auler } 973a34c753fSRafael Auler } 974076bd22fSShatianWang 975105ecd8bSMaksim Panchenko // Restore LP fragment for the main fragment if the splitting was undone. 976105ecd8bSMaksim Panchenko if (BF.hasEHRanges() && !BF.isSplit()) 977105ecd8bSMaksim Panchenko BF.setLPFragment(FragmentNum::main(), FragmentNum::main()); 978105ecd8bSMaksim Panchenko 979076bd22fSShatianWang // Fix branches if the splitting decision of the pass after function 980076bd22fSShatianWang // reordering is different from that of the pass before function reordering. 981076bd22fSShatianWang if (LayoutUpdated && BC.HasFinalizedFunctionOrder) 982076bd22fSShatianWang BF.fixBranches(); 983a34c753fSRafael Auler } 984a34c753fSRafael Auler 985ed743045SMaksim Panchenko SplitFunctions::TrampolineSetType 986ed743045SMaksim Panchenko SplitFunctions::createEHTrampolines(BinaryFunction &BF) const { 987f263a66bSMaksim Panchenko const auto &MIB = BF.getBinaryContext().MIB; 988f263a66bSMaksim Panchenko 989f263a66bSMaksim Panchenko // Map real landing pads to the corresponding trampolines. 990ed743045SMaksim Panchenko TrampolineSetType LPTrampolines; 991f263a66bSMaksim Panchenko 992f263a66bSMaksim Panchenko // Iterate over the copy of basic blocks since we are adding new blocks to the 993f263a66bSMaksim Panchenko // function which will invalidate its iterators. 994f263a66bSMaksim Panchenko std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend()); 995f263a66bSMaksim Panchenko for (BinaryBasicBlock *BB : Blocks) { 996f263a66bSMaksim Panchenko for (MCInst &Instr : *BB) { 9972563fd63SAmir Ayupov const std::optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr); 998f263a66bSMaksim Panchenko if (!EHInfo || !EHInfo->first) 999f263a66bSMaksim Panchenko continue; 1000f263a66bSMaksim Panchenko 1001f263a66bSMaksim Panchenko const MCSymbol *LPLabel = EHInfo->first; 1002f263a66bSMaksim Panchenko BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel); 1003e001a4e4SFabian Parzefall if (BB->getFragmentNum() == LPBlock->getFragmentNum()) 1004f263a66bSMaksim Panchenko continue; 1005f263a66bSMaksim Panchenko 1006f263a66bSMaksim Panchenko const MCSymbol *TrampolineLabel = nullptr; 1007e001a4e4SFabian Parzefall const TrampolineKey Key(BB->getFragmentNum(), LPLabel); 1008e001a4e4SFabian Parzefall auto Iter = LPTrampolines.find(Key); 1009f263a66bSMaksim Panchenko if (Iter != LPTrampolines.end()) { 1010f263a66bSMaksim Panchenko TrampolineLabel = Iter->second; 1011f263a66bSMaksim Panchenko } else { 1012f263a66bSMaksim Panchenko // Create a trampoline basic block in the same fragment as the thrower. 1013f263a66bSMaksim Panchenko // Note: there's no need to insert the jump instruction, it will be 1014f263a66bSMaksim Panchenko // added by fixBranches(). 1015f263a66bSMaksim Panchenko BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); 1016e001a4e4SFabian Parzefall TrampolineBB->setFragmentNum(BB->getFragmentNum()); 1017f263a66bSMaksim Panchenko TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); 1018f263a66bSMaksim Panchenko TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount()); 1019f263a66bSMaksim Panchenko TrampolineBB->setCFIState(LPBlock->getCFIState()); 1020f263a66bSMaksim Panchenko TrampolineLabel = TrampolineBB->getLabel(); 1021e001a4e4SFabian Parzefall LPTrampolines.insert(std::make_pair(Key, TrampolineLabel)); 1022f263a66bSMaksim Panchenko } 1023f263a66bSMaksim Panchenko 1024f263a66bSMaksim Panchenko // Substitute the landing pad with the trampoline. 1025f263a66bSMaksim Panchenko MIB->updateEHInfo(Instr, 1026f263a66bSMaksim Panchenko MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second)); 1027f263a66bSMaksim Panchenko } 1028f263a66bSMaksim Panchenko } 1029f263a66bSMaksim Panchenko 1030f263a66bSMaksim Panchenko if (LPTrampolines.empty()) 1031ed743045SMaksim Panchenko return LPTrampolines; 1032f263a66bSMaksim Panchenko 1033f263a66bSMaksim Panchenko // All trampoline blocks were added to the end of the function. Place them at 1034f263a66bSMaksim Panchenko // the end of corresponding fragments. 10358477bc67SFabian Parzefall BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(), 10368477bc67SFabian Parzefall BF.getLayout().block_end()); 10378477bc67SFabian Parzefall stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 1038e001a4e4SFabian Parzefall return A->getFragmentNum() < B->getFragmentNum(); 1039f263a66bSMaksim Panchenko }); 10408477bc67SFabian Parzefall BF.getLayout().update(NewLayout); 1041f263a66bSMaksim Panchenko 1042f263a66bSMaksim Panchenko // Conservatively introduce branch instructions. 1043f263a66bSMaksim Panchenko BF.fixBranches(); 1044f263a66bSMaksim Panchenko 1045f263a66bSMaksim Panchenko // Update exception-handling CFG for the function. 1046f263a66bSMaksim Panchenko BF.recomputeLandingPads(); 1047ed743045SMaksim Panchenko 1048ed743045SMaksim Panchenko return LPTrampolines; 1049ed743045SMaksim Panchenko } 1050ed743045SMaksim Panchenko 1051ed743045SMaksim Panchenko SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines( 1052ed743045SMaksim Panchenko BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout, 1053ed743045SMaksim Panchenko const SplitFunctions::TrampolineSetType &Trampolines) const { 1054e001a4e4SFabian Parzefall DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>> 1055e001a4e4SFabian Parzefall IncomingTrampolines; 1056e001a4e4SFabian Parzefall for (const auto &Entry : Trampolines) { 1057e001a4e4SFabian Parzefall IncomingTrampolines[Entry.getFirst().Target].emplace_back( 1058e001a4e4SFabian Parzefall Entry.getSecond()); 1059e001a4e4SFabian Parzefall } 1060e001a4e4SFabian Parzefall 1061ed743045SMaksim Panchenko BasicBlockOrderType MergedLayout; 1062ed743045SMaksim Panchenko for (BinaryBasicBlock *BB : Layout) { 1063e001a4e4SFabian Parzefall auto Iter = IncomingTrampolines.find(BB->getLabel()); 1064e001a4e4SFabian Parzefall if (Iter != IncomingTrampolines.end()) { 1065e001a4e4SFabian Parzefall for (const MCSymbol *const Trampoline : Iter->getSecond()) { 1066e001a4e4SFabian Parzefall BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Trampoline); 1067ed743045SMaksim Panchenko assert(LPBlock && "Could not find matching landing pad block."); 1068ed743045SMaksim Panchenko MergedLayout.push_back(LPBlock); 1069ed743045SMaksim Panchenko } 1070e001a4e4SFabian Parzefall } 1071ed743045SMaksim Panchenko MergedLayout.push_back(BB); 1072ed743045SMaksim Panchenko } 1073ed743045SMaksim Panchenko 1074ed743045SMaksim Panchenko return MergedLayout; 1075f263a66bSMaksim Panchenko } 1076f263a66bSMaksim Panchenko 1077a34c753fSRafael Auler } // namespace bolt 1078a34c753fSRafael Auler } // namespace llvm 1079