//===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements ReorderFunctions class. // //===----------------------------------------------------------------------===// #include "bolt/Passes/ReorderFunctions.h" #include "bolt/Passes/HFSort.h" #include "bolt/Utils/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/CodeLayout.h" #include #define DEBUG_TYPE "hfsort" using namespace llvm; namespace opts { extern cl::OptionCategory BoltOptCategory; extern cl::opt Verbosity; extern cl::opt RandomSeed; extern size_t padFunctionBefore(const bolt::BinaryFunction &Function); extern size_t padFunctionAfter(const bolt::BinaryFunction &Function); extern cl::opt ReorderFunctions; cl::opt ReorderFunctions( "reorder-functions", cl::desc("reorder and cluster functions (works only with relocations)"), cl::init(bolt::ReorderFunctions::RT_NONE), cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none", "do not reorder functions"), clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count", "order by execution count"), clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort", "use hfsort algorithm"), clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+", "use cache-directed sort"), clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort", "use cache-directed sort"), clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, "pettis-hansen", "use Pettis-Hansen algorithm"), clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random", "reorder functions randomly"), clEnumValN(bolt::ReorderFunctions::RT_USER, "user", "use function order specified by -function-order")), cl::ZeroOrMore, cl::cat(BoltOptCategory), cl::callback([](const bolt::ReorderFunctions::ReorderType &option) { if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) { errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated," << " please use '-reorder-functions=cdsort' instead\n"; ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT; } })); static cl::opt ReorderFunctionsUseHotSize( "reorder-functions-use-hot-size", cl::desc("use a function's hot size when doing clustering"), cl::init(true), cl::cat(BoltOptCategory)); static cl::opt FunctionOrderFile( "function-order", cl::desc("file containing an ordered list of functions to use for function " "reordering"), cl::cat(BoltOptCategory)); static cl::opt GenerateFunctionOrderFile( "generate-function-order", cl::desc("file to dump the ordered list of functions to use for function " "reordering"), cl::cat(BoltOptCategory)); static cl::opt LinkSectionsFile( "generate-link-sections", cl::desc("generate a list of function sections in a format suitable for " "inclusion in a linker script"), cl::cat(BoltOptCategory)); static cl::opt UseEdgeCounts("use-edge-counts", cl::desc("use edge count data when doing clustering"), cl::init(true), cl::cat(BoltOptCategory)); static cl::opt CgFromPerfData( "cg-from-perf-data", cl::desc("use perf data directly when constructing the call graph" " for stale functions"), cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt CgIgnoreRecursiveCalls( "cg-ignore-recursive-calls", cl::desc("ignore recursive calls when constructing the call graph"), cl::init(true), cl::cat(BoltOptCategory)); static cl::opt CgUseSplitHotSize( "cg-use-split-hot-size", cl::desc("use hot/cold data on basic blocks to determine hot sizes for " "call graph functions"), cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { namespace bolt { using NodeId = CallGraph::NodeId; using Arc = CallGraph::Arc; using Node = CallGraph::Node; void ReorderFunctions::reorder(BinaryContext &BC, std::vector &&Clusters, std::map &BFs) { std::vector FuncAddr(Cg.numNodes()); // Just for computing stats uint64_t TotalSize = 0; uint32_t Index = 0; // Set order of hot functions based on clusters. for (const Cluster &Cluster : Clusters) { for (const NodeId FuncId : Cluster.targets()) { Cg.nodeIdToFunc(FuncId)->setIndex(Index++); FuncAddr[FuncId] = TotalSize; TotalSize += Cg.size(FuncId); } } // Assign valid index for functions with valid profile. for (auto &It : BFs) { BinaryFunction &BF = It.second; if (!BF.hasValidIndex() && BF.hasValidProfile()) BF.setIndex(Index++); } if (opts::ReorderFunctions == RT_NONE) return; printStats(BC, Clusters, FuncAddr); } void ReorderFunctions::printStats(BinaryContext &BC, const std::vector &Clusters, const std::vector &FuncAddr) { if (opts::Verbosity == 0) { #ifndef NDEBUG if (!DebugFlag || !isCurrentDebugType("hfsort")) return; #else return; #endif } bool PrintDetailed = opts::Verbosity > 1; #ifndef NDEBUG PrintDetailed |= (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); #endif uint64_t TotalSize = 0; uint64_t CurPage = 0; uint64_t Hotfuncs = 0; double TotalDistance = 0; double TotalCalls = 0; double TotalCalls64B = 0; double TotalCalls4KB = 0; double TotalCalls2MB = 0; if (PrintDetailed) BC.outs() << "BOLT-INFO: Function reordering page layout\n" << "BOLT-INFO: ============== page 0 ==============\n"; for (const Cluster &Cluster : Clusters) { if (PrintDetailed) BC.outs() << format( "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n", Cluster.density(), Cluster.samples(), Cluster.size()); for (NodeId FuncId : Cluster.targets()) { if (Cg.samples(FuncId) > 0) { Hotfuncs++; if (PrintDetailed) BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " (" << Cg.size(FuncId) << ")\n"; uint64_t Dist = 0; uint64_t Calls = 0; for (NodeId Dst : Cg.successors(FuncId)) { if (FuncId == Dst) // ignore recursive calls in stats continue; const Arc &Arc = *Cg.findArc(FuncId, Dst); const auto D = std::abs(FuncAddr[Arc.dst()] - (FuncAddr[FuncId] + Arc.avgCallOffset())); const double W = Arc.weight(); if (D < 64 && PrintDetailed && opts::Verbosity > 2) BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n" << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n" << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n" << "BOLT-INFO: Weight = " << W << "\n" << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() << "\n"; Calls += W; if (D < 64) TotalCalls64B += W; if (D < 4096) TotalCalls4KB += W; if (D < (2 << 20)) TotalCalls2MB += W; Dist += Arc.weight() * D; if (PrintDetailed) BC.outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " "weight = %.0lf, callDist = %f\n", Arc.src(), FuncAddr[Arc.src()], Arc.avgCallOffset(), Arc.dst(), FuncAddr[Arc.dst()], Arc.weight(), D); } TotalCalls += Calls; TotalDistance += Dist; TotalSize += Cg.size(FuncId); if (PrintDetailed) { BC.outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ", TotalSize, Calls ? Dist / Calls : 0) << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n'; const uint64_t NewPage = TotalSize / HugePageSize; if (NewPage != CurPage) { CurPage = NewPage; BC.outs() << format( "BOLT-INFO: ============== page %u ==============\n", CurPage); } } } } } BC.outs() << "BOLT-INFO: Function reordering stats\n" << format("BOLT-INFO: Number of hot functions: %u\n" "BOLT-INFO: Number of clusters: %lu\n", Hotfuncs, Clusters.size()) << format("BOLT-INFO: Final average call distance = %.1lf " "(%.0lf / %.0lf)\n", TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance, TotalCalls) << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls); if (TotalCalls) BC.outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n", TotalCalls64B, 100 * TotalCalls64B / TotalCalls) << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n", TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls) << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n", TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls); } Error ReorderFunctions::readFunctionOrderFile( std::vector &FunctionNames) { std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in); if (!FuncsFile) return createFatalBOLTError(Twine("Ordered functions file \"") + Twine(opts::FunctionOrderFile) + Twine("\" can't be opened.")); std::string FuncName; while (std::getline(FuncsFile, FuncName)) FunctionNames.push_back(FuncName); return Error::success(); } Error ReorderFunctions::runOnFunctions(BinaryContext &BC) { auto &BFs = BC.getBinaryFunctions(); if (opts::ReorderFunctions != RT_NONE && opts::ReorderFunctions != RT_EXEC_COUNT && opts::ReorderFunctions != RT_USER) { Cg = buildCallGraph( BC, [](const BinaryFunction &BF) { if (!BF.hasProfile()) return true; if (BF.getState() != BinaryFunction::State::CFG) return true; return false; }, opts::CgFromPerfData, /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize, opts::CgUseSplitHotSize, opts::UseEdgeCounts, opts::CgIgnoreRecursiveCalls); Cg.normalizeArcWeights(); } std::vector Clusters; switch (opts::ReorderFunctions) { case RT_NONE: break; case RT_EXEC_COUNT: { std::vector SortedFunctions(BFs.size()); llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), [](BinaryFunction &BF) { return &BF; }); llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A, const BinaryFunction *B) { if (A->isIgnored()) return false; if (B->isIgnored()) return true; const size_t PadA = opts::padFunctionBefore(*A) + opts::padFunctionAfter(*A); const size_t PadB = opts::padFunctionBefore(*B) + opts::padFunctionAfter(*B); if (!PadA || !PadB) { if (PadA) return true; if (PadB) return false; } if (!A->hasProfile()) return false; if (!B->hasProfile()) return true; return A->getExecutionCount() > B->getExecutionCount(); }); uint32_t Index = 0; for (BinaryFunction *BF : SortedFunctions) if (BF->hasProfile()) { BF->setIndex(Index++); LLVM_DEBUG(if (opts::Verbosity > 1) { dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " (" << BF->getExecutionCount() << ")\n"; }); } } break; case RT_HFSORT: Clusters = clusterize(Cg); break; case RT_CDSORT: { // It is required that the sum of incoming arc weights is not greater // than the number of samples for every function. Ensuring the call graph // obeys the property before running the algorithm. Cg.adjustArcWeights(); // Initialize CFG nodes and their data std::vector FuncSizes; std::vector FuncCounts; std::vector CallCounts; std::vector CallOffsets; for (NodeId F = 0; F < Cg.numNodes(); ++F) { FuncSizes.push_back(Cg.size(F)); FuncCounts.push_back(Cg.samples(F)); for (NodeId Succ : Cg.successors(F)) { const Arc &Arc = *Cg.findArc(F, Succ); CallCounts.push_back({F, Succ, uint64_t(Arc.weight())}); CallOffsets.push_back(uint64_t(Arc.avgCallOffset())); } } // Run the layout algorithm. std::vector Result = codelayout::computeCacheDirectedLayout( FuncSizes, FuncCounts, CallCounts, CallOffsets); // Create a single cluster from the computed order of hot functions. std::vector NodeOrder(Result.begin(), Result.end()); Clusters.emplace_back(Cluster(NodeOrder, Cg)); } break; case RT_PETTIS_HANSEN: Clusters = pettisAndHansen(Cg); break; case RT_RANDOM: std::srand(opts::RandomSeed); Clusters = randomClusters(Cg); break; case RT_USER: { // Build LTOCommonNameMap StringMap> LTOCommonNameMap; for (const BinaryFunction &BF : llvm::make_second_range(BFs)) for (StringRef Name : BF.getNames()) if (std::optional LTOCommonName = getLTOCommonName(Name)) LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress()); uint32_t Index = 0; uint32_t InvalidEntries = 0; std::vector FunctionNames; if (Error E = readFunctionOrderFile(FunctionNames)) return Error(std::move(E)); for (const std::string &Function : FunctionNames) { std::vector FuncAddrs; BinaryData *BD = BC.getBinaryDataByName(Function); if (!BD) { // If we can't find the main symbol name, look for alternates. uint32_t LocalID = 1; while (true) { const std::string FuncName = Function + "/" + std::to_string(LocalID); BD = BC.getBinaryDataByName(FuncName); if (BD) FuncAddrs.push_back(BD->getAddress()); else break; LocalID++; } // Strip LTO suffixes if (std::optional CommonName = getLTOCommonName(Function)) if (LTOCommonNameMap.contains(*CommonName)) llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]); } else { FuncAddrs.push_back(BD->getAddress()); } if (FuncAddrs.empty()) { if (opts::Verbosity >= 1) BC.errs() << "BOLT-WARNING: Reorder functions: can't find function " << "for " << Function << "\n"; ++InvalidEntries; continue; } for (const uint64_t FuncAddr : FuncAddrs) { const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); assert(FuncBD); BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); if (!BF) { if (opts::Verbosity >= 1) BC.errs() << "BOLT-WARNING: Reorder functions: can't find function " << "for " << Function << "\n"; ++InvalidEntries; break; } if (!BF->hasValidIndex()) BF->setIndex(Index++); else if (opts::Verbosity > 0) BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function << "\n"; } } if (InvalidEntries) BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for " << InvalidEntries << " entries in -function-order list\n"; } break; default: llvm_unreachable("unexpected layout type"); } reorder(BC, std::move(Clusters), BFs); BC.HasFinalizedFunctionOrder = true; std::unique_ptr FuncsFile; if (!opts::GenerateFunctionOrderFile.empty()) { FuncsFile = std::make_unique(opts::GenerateFunctionOrderFile, std::ios::out); if (!FuncsFile) { BC.errs() << "BOLT-ERROR: ordered functions file " << opts::GenerateFunctionOrderFile << " cannot be opened\n"; return createFatalBOLTError(""); } } std::unique_ptr LinkSectionsFile; if (!opts::LinkSectionsFile.empty()) { LinkSectionsFile = std::make_unique(opts::LinkSectionsFile, std::ios::out); if (!LinkSectionsFile) { BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile << " cannot be opened\n"; return createFatalBOLTError(""); } } if (FuncsFile || LinkSectionsFile) { std::vector SortedFunctions(BFs.size()); llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), [](BinaryFunction &BF) { return &BF; }); // Sort functions by index. llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex); for (const BinaryFunction *Func : SortedFunctions) { if (!Func->hasValidIndex()) break; if (Func->isPLTFunction()) continue; if (FuncsFile) *FuncsFile << Func->getOneName().str() << '\n'; if (LinkSectionsFile) { const char *Indent = ""; std::vector AllNames = Func->getNames(); llvm::sort(AllNames); for (StringRef Name : AllNames) { const size_t SlashPos = Name.find('/'); if (SlashPos != std::string::npos) { // Avoid duplicates for local functions. if (Name.find('/', SlashPos + 1) != std::string::npos) continue; Name = Name.substr(0, SlashPos); } *LinkSectionsFile << Indent << ".text." << Name.str() << '\n'; Indent = " "; } } } if (FuncsFile) { FuncsFile->close(); BC.outs() << "BOLT-INFO: dumped function order to " << opts::GenerateFunctionOrderFile << '\n'; } if (LinkSectionsFile) { LinkSectionsFile->close(); BC.outs() << "BOLT-INFO: dumped linker section order to " << opts::LinkSectionsFile << '\n'; } } return Error::success(); } } // namespace bolt } // namespace llvm