1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements ReorderFunctions class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ReorderFunctions.h" 14 #include "bolt/Passes/HFSort.h" 15 #include "bolt/Utils/Utils.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Transforms/Utils/CodeLayout.h" 19 #include <fstream> 20 21 #define DEBUG_TYPE "hfsort" 22 23 using namespace llvm; 24 25 namespace opts { 26 27 extern cl::OptionCategory BoltOptCategory; 28 extern cl::opt<unsigned> Verbosity; 29 extern cl::opt<uint32_t> RandomSeed; 30 31 extern size_t padFunction(const cl::list<std::string> &Spec, 32 const bolt::BinaryFunction &Function); 33 extern cl::list<std::string> FunctionPadSpec, FunctionPadBeforeSpec; 34 35 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; 36 cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions( 37 "reorder-functions", 38 cl::desc("reorder and cluster functions (works only with relocations)"), 39 cl::init(bolt::ReorderFunctions::RT_NONE), 40 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none", 41 "do not reorder functions"), 42 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count", 43 "order by execution count"), 44 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort", 45 "use hfsort algorithm"), 46 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+", 47 "use cache-directed sort"), 48 clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort", 49 "use cache-directed sort"), 50 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, 51 "pettis-hansen", "use Pettis-Hansen algorithm"), 52 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random", 53 "reorder functions randomly"), 54 clEnumValN(bolt::ReorderFunctions::RT_USER, "user", 55 "use function order specified by -function-order")), 56 cl::ZeroOrMore, cl::cat(BoltOptCategory), 57 cl::callback([](const bolt::ReorderFunctions::ReorderType &option) { 58 if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) { 59 errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated," 60 << " please use '-reorder-functions=cdsort' instead\n"; 61 ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT; 62 } 63 })); 64 65 static cl::opt<bool> ReorderFunctionsUseHotSize( 66 "reorder-functions-use-hot-size", 67 cl::desc("use a function's hot size when doing clustering"), cl::init(true), 68 cl::cat(BoltOptCategory)); 69 70 static cl::opt<std::string> FunctionOrderFile( 71 "function-order", 72 cl::desc("file containing an ordered list of functions to use for function " 73 "reordering"), 74 cl::cat(BoltOptCategory)); 75 76 static cl::opt<std::string> GenerateFunctionOrderFile( 77 "generate-function-order", 78 cl::desc("file to dump the ordered list of functions to use for function " 79 "reordering"), 80 cl::cat(BoltOptCategory)); 81 82 static cl::opt<std::string> LinkSectionsFile( 83 "generate-link-sections", 84 cl::desc("generate a list of function sections in a format suitable for " 85 "inclusion in a linker script"), 86 cl::cat(BoltOptCategory)); 87 88 static cl::opt<bool> 89 UseEdgeCounts("use-edge-counts", 90 cl::desc("use edge count data when doing clustering"), 91 cl::init(true), cl::cat(BoltOptCategory)); 92 93 static cl::opt<bool> CgFromPerfData( 94 "cg-from-perf-data", 95 cl::desc("use perf data directly when constructing the call graph" 96 " for stale functions"), 97 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 98 99 static cl::opt<bool> CgIgnoreRecursiveCalls( 100 "cg-ignore-recursive-calls", 101 cl::desc("ignore recursive calls when constructing the call graph"), 102 cl::init(true), cl::cat(BoltOptCategory)); 103 104 static cl::opt<bool> CgUseSplitHotSize( 105 "cg-use-split-hot-size", 106 cl::desc("use hot/cold data on basic blocks to determine hot sizes for " 107 "call graph functions"), 108 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 109 110 } // namespace opts 111 112 namespace llvm { 113 namespace bolt { 114 115 using NodeId = CallGraph::NodeId; 116 using Arc = CallGraph::Arc; 117 using Node = CallGraph::Node; 118 119 void ReorderFunctions::reorder(BinaryContext &BC, 120 std::vector<Cluster> &&Clusters, 121 std::map<uint64_t, BinaryFunction> &BFs) { 122 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats 123 uint64_t TotalSize = 0; 124 uint32_t Index = 0; 125 126 // Set order of hot functions based on clusters. 127 for (const Cluster &Cluster : Clusters) { 128 for (const NodeId FuncId : Cluster.targets()) { 129 Cg.nodeIdToFunc(FuncId)->setIndex(Index++); 130 FuncAddr[FuncId] = TotalSize; 131 TotalSize += Cg.size(FuncId); 132 } 133 } 134 135 // Assign valid index for functions with valid profile. 136 for (auto &It : BFs) { 137 BinaryFunction &BF = It.second; 138 if (!BF.hasValidIndex() && BF.hasValidProfile()) 139 BF.setIndex(Index++); 140 } 141 142 if (opts::ReorderFunctions == RT_NONE) 143 return; 144 145 printStats(BC, Clusters, FuncAddr); 146 } 147 148 void ReorderFunctions::printStats(BinaryContext &BC, 149 const std::vector<Cluster> &Clusters, 150 const std::vector<uint64_t> &FuncAddr) { 151 if (opts::Verbosity == 0) { 152 #ifndef NDEBUG 153 if (!DebugFlag || !isCurrentDebugType("hfsort")) 154 return; 155 #else 156 return; 157 #endif 158 } 159 160 bool PrintDetailed = opts::Verbosity > 1; 161 #ifndef NDEBUG 162 PrintDetailed |= 163 (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); 164 #endif 165 uint64_t TotalSize = 0; 166 uint64_t CurPage = 0; 167 uint64_t Hotfuncs = 0; 168 double TotalDistance = 0; 169 double TotalCalls = 0; 170 double TotalCalls64B = 0; 171 double TotalCalls4KB = 0; 172 double TotalCalls2MB = 0; 173 if (PrintDetailed) 174 BC.outs() << "BOLT-INFO: Function reordering page layout\n" 175 << "BOLT-INFO: ============== page 0 ==============\n"; 176 for (const Cluster &Cluster : Clusters) { 177 if (PrintDetailed) 178 BC.outs() << format( 179 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n", 180 Cluster.density(), Cluster.samples(), Cluster.size()); 181 182 for (NodeId FuncId : Cluster.targets()) { 183 if (Cg.samples(FuncId) > 0) { 184 Hotfuncs++; 185 186 if (PrintDetailed) 187 BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) 188 << " (" << Cg.size(FuncId) << ")\n"; 189 190 uint64_t Dist = 0; 191 uint64_t Calls = 0; 192 for (NodeId Dst : Cg.successors(FuncId)) { 193 if (FuncId == Dst) // ignore recursive calls in stats 194 continue; 195 const Arc &Arc = *Cg.findArc(FuncId, Dst); 196 const auto D = std::abs(FuncAddr[Arc.dst()] - 197 (FuncAddr[FuncId] + Arc.avgCallOffset())); 198 const double W = Arc.weight(); 199 if (D < 64 && PrintDetailed && opts::Verbosity > 2) 200 BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n" 201 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) 202 << "\n" 203 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n" 204 << "BOLT-INFO: Weight = " << W << "\n" 205 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() 206 << "\n"; 207 Calls += W; 208 if (D < 64) 209 TotalCalls64B += W; 210 if (D < 4096) 211 TotalCalls4KB += W; 212 if (D < (2 << 20)) 213 TotalCalls2MB += W; 214 Dist += Arc.weight() * D; 215 if (PrintDetailed) 216 BC.outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " 217 "weight = %.0lf, callDist = %f\n", 218 Arc.src(), FuncAddr[Arc.src()], 219 Arc.avgCallOffset(), Arc.dst(), 220 FuncAddr[Arc.dst()], Arc.weight(), D); 221 } 222 TotalCalls += Calls; 223 TotalDistance += Dist; 224 TotalSize += Cg.size(FuncId); 225 226 if (PrintDetailed) { 227 BC.outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ", 228 TotalSize, Calls ? Dist / Calls : 0) 229 << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n'; 230 const uint64_t NewPage = TotalSize / HugePageSize; 231 if (NewPage != CurPage) { 232 CurPage = NewPage; 233 BC.outs() << format( 234 "BOLT-INFO: ============== page %u ==============\n", CurPage); 235 } 236 } 237 } 238 } 239 } 240 BC.outs() << "BOLT-INFO: Function reordering stats\n" 241 << format("BOLT-INFO: Number of hot functions: %u\n" 242 "BOLT-INFO: Number of clusters: %lu\n", 243 Hotfuncs, Clusters.size()) 244 << format("BOLT-INFO: Final average call distance = %.1lf " 245 "(%.0lf / %.0lf)\n", 246 TotalCalls ? TotalDistance / TotalCalls : 0, 247 TotalDistance, TotalCalls) 248 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls); 249 if (TotalCalls) 250 BC.outs() 251 << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n", 252 TotalCalls64B, 100 * TotalCalls64B / TotalCalls) 253 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n", 254 TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls) 255 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n", 256 TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls); 257 } 258 259 Error ReorderFunctions::readFunctionOrderFile( 260 std::vector<std::string> &FunctionNames) { 261 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in); 262 if (!FuncsFile) 263 return createFatalBOLTError(Twine("Ordered functions file \"") + 264 Twine(opts::FunctionOrderFile) + 265 Twine("\" can't be opened.")); 266 267 std::string FuncName; 268 while (std::getline(FuncsFile, FuncName)) 269 FunctionNames.push_back(FuncName); 270 return Error::success(); 271 } 272 273 Error ReorderFunctions::runOnFunctions(BinaryContext &BC) { 274 auto &BFs = BC.getBinaryFunctions(); 275 if (opts::ReorderFunctions != RT_NONE && 276 opts::ReorderFunctions != RT_EXEC_COUNT && 277 opts::ReorderFunctions != RT_USER) { 278 Cg = buildCallGraph( 279 BC, 280 [](const BinaryFunction &BF) { 281 if (!BF.hasProfile()) 282 return true; 283 if (BF.getState() != BinaryFunction::State::CFG) 284 return true; 285 return false; 286 }, 287 opts::CgFromPerfData, 288 /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize, 289 opts::CgUseSplitHotSize, opts::UseEdgeCounts, 290 opts::CgIgnoreRecursiveCalls); 291 Cg.normalizeArcWeights(); 292 } 293 294 std::vector<Cluster> Clusters; 295 296 switch (opts::ReorderFunctions) { 297 case RT_NONE: 298 break; 299 case RT_EXEC_COUNT: { 300 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 301 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), 302 [](BinaryFunction &BF) { return &BF; }); 303 llvm::stable_sort(SortedFunctions, 304 [&](const BinaryFunction *A, const BinaryFunction *B) { 305 if (A->isIgnored()) 306 return false; 307 if (B->isIgnored()) 308 return true; 309 const size_t PadA = 310 opts::padFunction(opts::FunctionPadSpec, *A) + 311 opts::padFunction(opts::FunctionPadBeforeSpec, *A); 312 const size_t PadB = 313 opts::padFunction(opts::FunctionPadSpec, *B) + 314 opts::padFunction(opts::FunctionPadBeforeSpec, *B); 315 if (!PadA || !PadB) { 316 if (PadA) 317 return true; 318 if (PadB) 319 return false; 320 } 321 if (!A->hasProfile()) 322 return false; 323 if (!B->hasProfile()) 324 return true; 325 return A->getExecutionCount() > B->getExecutionCount(); 326 }); 327 uint32_t Index = 0; 328 for (BinaryFunction *BF : SortedFunctions) 329 if (BF->hasProfile()) { 330 BF->setIndex(Index++); 331 LLVM_DEBUG(if (opts::Verbosity > 1) { 332 dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " (" 333 << BF->getExecutionCount() << ")\n"; 334 }); 335 } 336 } break; 337 case RT_HFSORT: 338 Clusters = clusterize(Cg); 339 break; 340 case RT_CDSORT: { 341 // It is required that the sum of incoming arc weights is not greater 342 // than the number of samples for every function. Ensuring the call graph 343 // obeys the property before running the algorithm. 344 Cg.adjustArcWeights(); 345 346 // Initialize CFG nodes and their data 347 std::vector<uint64_t> FuncSizes; 348 std::vector<uint64_t> FuncCounts; 349 std::vector<codelayout::EdgeCount> CallCounts; 350 std::vector<uint64_t> CallOffsets; 351 for (NodeId F = 0; F < Cg.numNodes(); ++F) { 352 FuncSizes.push_back(Cg.size(F)); 353 FuncCounts.push_back(Cg.samples(F)); 354 for (NodeId Succ : Cg.successors(F)) { 355 const Arc &Arc = *Cg.findArc(F, Succ); 356 CallCounts.push_back({F, Succ, uint64_t(Arc.weight())}); 357 CallOffsets.push_back(uint64_t(Arc.avgCallOffset())); 358 } 359 } 360 361 // Run the layout algorithm. 362 std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout( 363 FuncSizes, FuncCounts, CallCounts, CallOffsets); 364 365 // Create a single cluster from the computed order of hot functions. 366 std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end()); 367 Clusters.emplace_back(Cluster(NodeOrder, Cg)); 368 } break; 369 case RT_PETTIS_HANSEN: 370 Clusters = pettisAndHansen(Cg); 371 break; 372 case RT_RANDOM: 373 std::srand(opts::RandomSeed); 374 Clusters = randomClusters(Cg); 375 break; 376 case RT_USER: { 377 // Build LTOCommonNameMap 378 StringMap<std::vector<uint64_t>> LTOCommonNameMap; 379 for (const BinaryFunction &BF : llvm::make_second_range(BFs)) 380 for (StringRef Name : BF.getNames()) 381 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name)) 382 LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress()); 383 384 uint32_t Index = 0; 385 uint32_t InvalidEntries = 0; 386 std::vector<std::string> FunctionNames; 387 if (Error E = readFunctionOrderFile(FunctionNames)) 388 return Error(std::move(E)); 389 390 for (const std::string &Function : FunctionNames) { 391 std::vector<uint64_t> FuncAddrs; 392 393 BinaryData *BD = BC.getBinaryDataByName(Function); 394 if (!BD) { 395 // If we can't find the main symbol name, look for alternates. 396 uint32_t LocalID = 1; 397 while (true) { 398 const std::string FuncName = Function + "/" + std::to_string(LocalID); 399 BD = BC.getBinaryDataByName(FuncName); 400 if (BD) 401 FuncAddrs.push_back(BD->getAddress()); 402 else 403 break; 404 LocalID++; 405 } 406 // Strip LTO suffixes 407 if (std::optional<StringRef> CommonName = getLTOCommonName(Function)) 408 if (LTOCommonNameMap.contains(*CommonName)) 409 llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]); 410 } else { 411 FuncAddrs.push_back(BD->getAddress()); 412 } 413 414 if (FuncAddrs.empty()) { 415 if (opts::Verbosity >= 1) 416 BC.errs() << "BOLT-WARNING: Reorder functions: can't find function " 417 << "for " << Function << "\n"; 418 ++InvalidEntries; 419 continue; 420 } 421 422 for (const uint64_t FuncAddr : FuncAddrs) { 423 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); 424 assert(FuncBD); 425 426 BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); 427 if (!BF) { 428 if (opts::Verbosity >= 1) 429 BC.errs() << "BOLT-WARNING: Reorder functions: can't find function " 430 << "for " << Function << "\n"; 431 ++InvalidEntries; 432 break; 433 } 434 if (!BF->hasValidIndex()) 435 BF->setIndex(Index++); 436 else if (opts::Verbosity > 0) 437 BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function 438 << "\n"; 439 } 440 } 441 if (InvalidEntries) 442 BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for " 443 << InvalidEntries << " entries in -function-order list\n"; 444 } break; 445 446 default: 447 llvm_unreachable("unexpected layout type"); 448 } 449 450 reorder(BC, std::move(Clusters), BFs); 451 452 BC.HasFinalizedFunctionOrder = true; 453 454 std::unique_ptr<std::ofstream> FuncsFile; 455 if (!opts::GenerateFunctionOrderFile.empty()) { 456 FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile, 457 std::ios::out); 458 if (!FuncsFile) { 459 BC.errs() << "BOLT-ERROR: ordered functions file " 460 << opts::GenerateFunctionOrderFile << " cannot be opened\n"; 461 return createFatalBOLTError(""); 462 } 463 } 464 465 std::unique_ptr<std::ofstream> LinkSectionsFile; 466 if (!opts::LinkSectionsFile.empty()) { 467 LinkSectionsFile = 468 std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out); 469 if (!LinkSectionsFile) { 470 BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile 471 << " cannot be opened\n"; 472 return createFatalBOLTError(""); 473 } 474 } 475 476 if (FuncsFile || LinkSectionsFile) { 477 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 478 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), 479 [](BinaryFunction &BF) { return &BF; }); 480 481 // Sort functions by index. 482 llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex); 483 484 for (const BinaryFunction *Func : SortedFunctions) { 485 if (!Func->hasValidIndex()) 486 break; 487 if (Func->isPLTFunction()) 488 continue; 489 490 if (FuncsFile) 491 *FuncsFile << Func->getOneName().str() << '\n'; 492 493 if (LinkSectionsFile) { 494 const char *Indent = ""; 495 std::vector<StringRef> AllNames = Func->getNames(); 496 llvm::sort(AllNames); 497 for (StringRef Name : AllNames) { 498 const size_t SlashPos = Name.find('/'); 499 if (SlashPos != std::string::npos) { 500 // Avoid duplicates for local functions. 501 if (Name.find('/', SlashPos + 1) != std::string::npos) 502 continue; 503 Name = Name.substr(0, SlashPos); 504 } 505 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n'; 506 Indent = " "; 507 } 508 } 509 } 510 511 if (FuncsFile) { 512 FuncsFile->close(); 513 BC.outs() << "BOLT-INFO: dumped function order to " 514 << opts::GenerateFunctionOrderFile << '\n'; 515 } 516 517 if (LinkSectionsFile) { 518 LinkSectionsFile->close(); 519 BC.outs() << "BOLT-INFO: dumped linker section order to " 520 << opts::LinkSectionsFile << '\n'; 521 } 522 } 523 return Error::success(); 524 } 525 526 } // namespace bolt 527 } // namespace llvm 528