1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements ReorderFunctions class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ReorderFunctions.h" 14 #include "bolt/Passes/HFSort.h" 15 #include "bolt/Utils/Utils.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Transforms/Utils/CodeLayout.h" 19 #include <fstream> 20 21 #define DEBUG_TYPE "hfsort" 22 23 using namespace llvm; 24 25 namespace opts { 26 27 extern cl::OptionCategory BoltOptCategory; 28 extern cl::opt<unsigned> Verbosity; 29 extern cl::opt<uint32_t> RandomSeed; 30 31 extern size_t padFunction(const bolt::BinaryFunction &Function); 32 33 cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions( 34 "reorder-functions", 35 cl::desc("reorder and cluster functions (works only with relocations)"), 36 cl::init(bolt::ReorderFunctions::RT_NONE), 37 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none", 38 "do not reorder functions"), 39 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count", 40 "order by execution count"), 41 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort", 42 "use hfsort algorithm"), 43 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+", 44 "use hfsort+ algorithm"), 45 clEnumValN(bolt::ReorderFunctions::RT_CDS, "cds", 46 "use cache-directed sort"), 47 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, 48 "pettis-hansen", "use Pettis-Hansen algorithm"), 49 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random", 50 "reorder functions randomly"), 51 clEnumValN(bolt::ReorderFunctions::RT_USER, "user", 52 "use function order specified by -function-order")), 53 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 54 55 static cl::opt<bool> ReorderFunctionsUseHotSize( 56 "reorder-functions-use-hot-size", 57 cl::desc("use a function's hot size when doing clustering"), cl::init(true), 58 cl::cat(BoltOptCategory)); 59 60 static cl::opt<std::string> FunctionOrderFile( 61 "function-order", 62 cl::desc("file containing an ordered list of functions to use for function " 63 "reordering"), 64 cl::cat(BoltOptCategory)); 65 66 static cl::opt<std::string> GenerateFunctionOrderFile( 67 "generate-function-order", 68 cl::desc("file to dump the ordered list of functions to use for function " 69 "reordering"), 70 cl::cat(BoltOptCategory)); 71 72 static cl::opt<std::string> LinkSectionsFile( 73 "generate-link-sections", 74 cl::desc("generate a list of function sections in a format suitable for " 75 "inclusion in a linker script"), 76 cl::cat(BoltOptCategory)); 77 78 static cl::opt<bool> 79 UseEdgeCounts("use-edge-counts", 80 cl::desc("use edge count data when doing clustering"), 81 cl::init(true), cl::cat(BoltOptCategory)); 82 83 static cl::opt<bool> CgFromPerfData( 84 "cg-from-perf-data", 85 cl::desc("use perf data directly when constructing the call graph" 86 " for stale functions"), 87 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 88 89 static cl::opt<bool> CgIgnoreRecursiveCalls( 90 "cg-ignore-recursive-calls", 91 cl::desc("ignore recursive calls when constructing the call graph"), 92 cl::init(true), cl::cat(BoltOptCategory)); 93 94 static cl::opt<bool> CgUseSplitHotSize( 95 "cg-use-split-hot-size", 96 cl::desc("use hot/cold data on basic blocks to determine hot sizes for " 97 "call graph functions"), 98 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 99 100 } // namespace opts 101 102 namespace llvm { 103 namespace bolt { 104 105 using NodeId = CallGraph::NodeId; 106 using Arc = CallGraph::Arc; 107 using Node = CallGraph::Node; 108 109 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters, 110 std::map<uint64_t, BinaryFunction> &BFs) { 111 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats 112 uint64_t TotalSize = 0; 113 uint32_t Index = 0; 114 115 // Set order of hot functions based on clusters. 116 for (const Cluster &Cluster : Clusters) { 117 for (const NodeId FuncId : Cluster.targets()) { 118 Cg.nodeIdToFunc(FuncId)->setIndex(Index++); 119 FuncAddr[FuncId] = TotalSize; 120 TotalSize += Cg.size(FuncId); 121 } 122 } 123 124 // Assign valid index for functions with valid profile. 125 for (auto &It : BFs) { 126 BinaryFunction &BF = It.second; 127 if (!BF.hasValidIndex() && BF.hasValidProfile()) 128 BF.setIndex(Index++); 129 } 130 131 if (opts::ReorderFunctions == RT_NONE) 132 return; 133 134 printStats(Clusters, FuncAddr); 135 } 136 137 void ReorderFunctions::printStats(const std::vector<Cluster> &Clusters, 138 const std::vector<uint64_t> &FuncAddr) { 139 if (opts::Verbosity == 0) { 140 #ifndef NDEBUG 141 if (!DebugFlag || !isCurrentDebugType("hfsort")) 142 return; 143 #else 144 return; 145 #endif 146 } 147 148 bool PrintDetailed = opts::Verbosity > 1; 149 #ifndef NDEBUG 150 PrintDetailed |= 151 (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); 152 #endif 153 uint64_t TotalSize = 0; 154 uint64_t CurPage = 0; 155 uint64_t Hotfuncs = 0; 156 double TotalDistance = 0; 157 double TotalCalls = 0; 158 double TotalCalls64B = 0; 159 double TotalCalls4KB = 0; 160 double TotalCalls2MB = 0; 161 if (PrintDetailed) 162 outs() << "BOLT-INFO: Function reordering page layout\n" 163 << "BOLT-INFO: ============== page 0 ==============\n"; 164 for (const Cluster &Cluster : Clusters) { 165 if (PrintDetailed) 166 outs() << format( 167 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n", 168 Cluster.density(), Cluster.samples(), Cluster.size()); 169 170 for (NodeId FuncId : Cluster.targets()) { 171 if (Cg.samples(FuncId) > 0) { 172 Hotfuncs++; 173 174 if (PrintDetailed) 175 outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " (" 176 << Cg.size(FuncId) << ")\n"; 177 178 uint64_t Dist = 0; 179 uint64_t Calls = 0; 180 for (NodeId Dst : Cg.successors(FuncId)) { 181 if (FuncId == Dst) // ignore recursive calls in stats 182 continue; 183 const Arc &Arc = *Cg.findArc(FuncId, Dst); 184 const auto D = std::abs(FuncAddr[Arc.dst()] - 185 (FuncAddr[FuncId] + Arc.avgCallOffset())); 186 const double W = Arc.weight(); 187 if (D < 64 && PrintDetailed && opts::Verbosity > 2) 188 outs() << "BOLT-INFO: short (" << D << "B) call:\n" 189 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n" 190 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n" 191 << "BOLT-INFO: Weight = " << W << "\n" 192 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() 193 << "\n"; 194 Calls += W; 195 if (D < 64) 196 TotalCalls64B += W; 197 if (D < 4096) 198 TotalCalls4KB += W; 199 if (D < (2 << 20)) 200 TotalCalls2MB += W; 201 Dist += Arc.weight() * D; 202 if (PrintDetailed) 203 outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " 204 "weight = %.0lf, callDist = %f\n", 205 Arc.src(), FuncAddr[Arc.src()], 206 Arc.avgCallOffset(), Arc.dst(), 207 FuncAddr[Arc.dst()], Arc.weight(), D); 208 } 209 TotalCalls += Calls; 210 TotalDistance += Dist; 211 TotalSize += Cg.size(FuncId); 212 213 if (PrintDetailed) { 214 outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ", 215 TotalSize, Calls ? Dist / Calls : 0) 216 << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n'; 217 const uint64_t NewPage = TotalSize / HugePageSize; 218 if (NewPage != CurPage) { 219 CurPage = NewPage; 220 outs() << format( 221 "BOLT-INFO: ============== page %u ==============\n", CurPage); 222 } 223 } 224 } 225 } 226 } 227 outs() << "BOLT-INFO: Function reordering stats\n" 228 << format("BOLT-INFO: Number of hot functions: %u\n" 229 "BOLT-INFO: Number of clusters: %lu\n", 230 Hotfuncs, Clusters.size()) 231 << format("BOLT-INFO: Final average call distance = %.1lf " 232 "(%.0lf / %.0lf)\n", 233 TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance, 234 TotalCalls) 235 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls); 236 if (TotalCalls) 237 outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n", 238 TotalCalls64B, 100 * TotalCalls64B / TotalCalls) 239 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n", 240 TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls) 241 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n", 242 TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls); 243 } 244 245 std::vector<std::string> ReorderFunctions::readFunctionOrderFile() { 246 std::vector<std::string> FunctionNames; 247 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in); 248 if (!FuncsFile) { 249 errs() << "Ordered functions file \"" << opts::FunctionOrderFile 250 << "\" can't be opened.\n"; 251 exit(1); 252 } 253 std::string FuncName; 254 while (std::getline(FuncsFile, FuncName)) 255 FunctionNames.push_back(FuncName); 256 return FunctionNames; 257 } 258 259 void ReorderFunctions::runOnFunctions(BinaryContext &BC) { 260 auto &BFs = BC.getBinaryFunctions(); 261 if (opts::ReorderFunctions != RT_NONE && 262 opts::ReorderFunctions != RT_EXEC_COUNT && 263 opts::ReorderFunctions != RT_USER) { 264 Cg = buildCallGraph( 265 BC, 266 [](const BinaryFunction &BF) { 267 if (!BF.hasProfile()) 268 return true; 269 if (BF.getState() != BinaryFunction::State::CFG) 270 return true; 271 return false; 272 }, 273 opts::CgFromPerfData, 274 /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize, 275 opts::CgUseSplitHotSize, opts::UseEdgeCounts, 276 opts::CgIgnoreRecursiveCalls); 277 Cg.normalizeArcWeights(); 278 } 279 280 std::vector<Cluster> Clusters; 281 282 switch (opts::ReorderFunctions) { 283 case RT_NONE: 284 break; 285 case RT_EXEC_COUNT: { 286 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 287 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), 288 [](BinaryFunction &BF) { return &BF; }); 289 llvm::stable_sort(SortedFunctions, 290 [&](const BinaryFunction *A, const BinaryFunction *B) { 291 if (A->isIgnored()) 292 return false; 293 if (B->isIgnored()) 294 return true; 295 const size_t PadA = opts::padFunction(*A); 296 const size_t PadB = opts::padFunction(*B); 297 if (!PadA || !PadB) { 298 if (PadA) 299 return true; 300 if (PadB) 301 return false; 302 } 303 if (!A->hasProfile()) 304 return false; 305 if (!B->hasProfile()) 306 return true; 307 return A->getExecutionCount() > B->getExecutionCount(); 308 }); 309 uint32_t Index = 0; 310 for (BinaryFunction *BF : SortedFunctions) 311 if (BF->hasProfile()) { 312 BF->setIndex(Index++); 313 LLVM_DEBUG(if (opts::Verbosity > 1) { 314 dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " (" 315 << BF->getExecutionCount() << ")\n"; 316 }); 317 } 318 } break; 319 case RT_HFSORT: 320 Clusters = clusterize(Cg); 321 break; 322 case RT_HFSORT_PLUS: 323 Clusters = hfsortPlus(Cg); 324 break; 325 case RT_CDS: { 326 // It is required that the sum of incoming arc weights is not greater 327 // than the number of samples for every function. Ensuring the call graph 328 // obeys the property before running the algorithm. 329 Cg.adjustArcWeights(); 330 331 // Initialize CFG nodes and their data 332 std::vector<uint64_t> FuncSizes; 333 std::vector<uint64_t> FuncCounts; 334 using JumpT = std::pair<uint64_t, uint64_t>; 335 std::vector<std::pair<JumpT, uint64_t>> CallCounts; 336 std::vector<uint64_t> CallOffsets; 337 for (NodeId F = 0; F < Cg.numNodes(); ++F) { 338 FuncSizes.push_back(Cg.size(F)); 339 FuncCounts.push_back(Cg.samples(F)); 340 for (NodeId Succ : Cg.successors(F)) { 341 const Arc &Arc = *Cg.findArc(F, Succ); 342 auto It = std::make_pair(F, Succ); 343 CallCounts.push_back(std::make_pair(It, Arc.weight())); 344 CallOffsets.push_back(uint64_t(Arc.avgCallOffset())); 345 } 346 } 347 348 // Run the layout algorithm. 349 std::vector<uint64_t> Result = 350 applyCDSLayout(FuncSizes, FuncCounts, CallCounts, CallOffsets); 351 352 // Create a single cluster from the computed order of hot functions. 353 std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end()); 354 Clusters.emplace_back(Cluster(NodeOrder, Cg)); 355 } break; 356 case RT_PETTIS_HANSEN: 357 Clusters = pettisAndHansen(Cg); 358 break; 359 case RT_RANDOM: 360 std::srand(opts::RandomSeed); 361 Clusters = randomClusters(Cg); 362 break; 363 case RT_USER: { 364 // Build LTOCommonNameMap 365 StringMap<std::vector<uint64_t>> LTOCommonNameMap; 366 for (const BinaryFunction &BF : llvm::make_second_range(BFs)) 367 for (StringRef Name : BF.getNames()) 368 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name)) 369 LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress()); 370 371 uint32_t Index = 0; 372 uint32_t InvalidEntries = 0; 373 for (const std::string &Function : readFunctionOrderFile()) { 374 std::vector<uint64_t> FuncAddrs; 375 376 BinaryData *BD = BC.getBinaryDataByName(Function); 377 if (!BD) { 378 // If we can't find the main symbol name, look for alternates. 379 uint32_t LocalID = 1; 380 while (true) { 381 const std::string FuncName = Function + "/" + std::to_string(LocalID); 382 BD = BC.getBinaryDataByName(FuncName); 383 if (BD) 384 FuncAddrs.push_back(BD->getAddress()); 385 else 386 break; 387 LocalID++; 388 } 389 // Strip LTO suffixes 390 if (std::optional<StringRef> CommonName = getLTOCommonName(Function)) 391 if (LTOCommonNameMap.contains(*CommonName)) 392 llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]); 393 } else { 394 FuncAddrs.push_back(BD->getAddress()); 395 } 396 397 if (FuncAddrs.empty()) { 398 if (opts::Verbosity >= 1) 399 errs() << "BOLT-WARNING: Reorder functions: can't find function " 400 << "for " << Function << "\n"; 401 ++InvalidEntries; 402 continue; 403 } 404 405 for (const uint64_t FuncAddr : FuncAddrs) { 406 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); 407 assert(FuncBD); 408 409 BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); 410 if (!BF) { 411 if (opts::Verbosity >= 1) 412 errs() << "BOLT-WARNING: Reorder functions: can't find function " 413 << "for " << Function << "\n"; 414 ++InvalidEntries; 415 break; 416 } 417 if (!BF->hasValidIndex()) 418 BF->setIndex(Index++); 419 else if (opts::Verbosity > 0) 420 errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function 421 << "\n"; 422 } 423 } 424 if (InvalidEntries) 425 errs() << "BOLT-WARNING: Reorder functions: can't find functions for " 426 << InvalidEntries << " entries in -function-order list\n"; 427 } break; 428 } 429 430 reorder(std::move(Clusters), BFs); 431 432 std::unique_ptr<std::ofstream> FuncsFile; 433 if (!opts::GenerateFunctionOrderFile.empty()) { 434 FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile, 435 std::ios::out); 436 if (!FuncsFile) { 437 errs() << "BOLT-ERROR: ordered functions file " 438 << opts::GenerateFunctionOrderFile << " cannot be opened\n"; 439 exit(1); 440 } 441 } 442 443 std::unique_ptr<std::ofstream> LinkSectionsFile; 444 if (!opts::LinkSectionsFile.empty()) { 445 LinkSectionsFile = 446 std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out); 447 if (!LinkSectionsFile) { 448 errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile 449 << " cannot be opened\n"; 450 exit(1); 451 } 452 } 453 454 if (FuncsFile || LinkSectionsFile) { 455 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 456 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), 457 [](BinaryFunction &BF) { return &BF; }); 458 459 // Sort functions by index. 460 llvm::stable_sort(SortedFunctions, 461 [](const BinaryFunction *A, const BinaryFunction *B) { 462 if (A->hasValidIndex() && B->hasValidIndex()) 463 return A->getIndex() < B->getIndex(); 464 if (A->hasValidIndex() && !B->hasValidIndex()) 465 return true; 466 if (!A->hasValidIndex() && B->hasValidIndex()) 467 return false; 468 return A->getAddress() < B->getAddress(); 469 }); 470 471 for (const BinaryFunction *Func : SortedFunctions) { 472 if (!Func->hasValidIndex()) 473 break; 474 if (Func->isPLTFunction()) 475 continue; 476 477 if (FuncsFile) 478 *FuncsFile << Func->getOneName().str() << '\n'; 479 480 if (LinkSectionsFile) { 481 const char *Indent = ""; 482 std::vector<StringRef> AllNames = Func->getNames(); 483 llvm::sort(AllNames); 484 for (StringRef Name : AllNames) { 485 const size_t SlashPos = Name.find('/'); 486 if (SlashPos != std::string::npos) { 487 // Avoid duplicates for local functions. 488 if (Name.find('/', SlashPos + 1) != std::string::npos) 489 continue; 490 Name = Name.substr(0, SlashPos); 491 } 492 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n'; 493 Indent = " "; 494 } 495 } 496 } 497 498 if (FuncsFile) { 499 FuncsFile->close(); 500 outs() << "BOLT-INFO: dumped function order to " 501 << opts::GenerateFunctionOrderFile << '\n'; 502 } 503 504 if (LinkSectionsFile) { 505 LinkSectionsFile->close(); 506 outs() << "BOLT-INFO: dumped linker section order to " 507 << opts::LinkSectionsFile << '\n'; 508 } 509 } 510 } 511 512 } // namespace bolt 513 } // namespace llvm 514