1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements ReorderFunctions class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ReorderFunctions.h" 14 #include "bolt/Passes/HFSort.h" 15 #include "bolt/Utils/Utils.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Transforms/Utils/CodeLayout.h" 19 #include <fstream> 20 21 #define DEBUG_TYPE "hfsort" 22 23 using namespace llvm; 24 25 namespace opts { 26 27 extern cl::OptionCategory BoltOptCategory; 28 extern cl::opt<unsigned> Verbosity; 29 extern cl::opt<uint32_t> RandomSeed; 30 31 extern size_t padFunctionBefore(const bolt::BinaryFunction &Function); 32 extern size_t padFunctionAfter(const bolt::BinaryFunction &Function); 33 34 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; 35 cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions( 36 "reorder-functions", 37 cl::desc("reorder and cluster functions (works only with relocations)"), 38 cl::init(bolt::ReorderFunctions::RT_NONE), 39 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none", 40 "do not reorder functions"), 41 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count", 42 "order by execution count"), 43 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort", 44 "use hfsort algorithm"), 45 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+", 46 "use cache-directed sort"), 47 clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort", 48 "use cache-directed sort"), 49 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, 50 "pettis-hansen", "use Pettis-Hansen algorithm"), 51 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random", 52 "reorder functions randomly"), 53 clEnumValN(bolt::ReorderFunctions::RT_USER, "user", 54 "use function order specified by -function-order")), 55 cl::ZeroOrMore, cl::cat(BoltOptCategory), 56 cl::callback([](const bolt::ReorderFunctions::ReorderType &option) { 57 if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) { 58 errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated," 59 << " please use '-reorder-functions=cdsort' instead\n"; 60 ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT; 61 } 62 })); 63 64 static cl::opt<bool> ReorderFunctionsUseHotSize( 65 "reorder-functions-use-hot-size", 66 cl::desc("use a function's hot size when doing clustering"), cl::init(true), 67 cl::cat(BoltOptCategory)); 68 69 static cl::opt<std::string> FunctionOrderFile( 70 "function-order", 71 cl::desc("file containing an ordered list of functions to use for function " 72 "reordering"), 73 cl::cat(BoltOptCategory)); 74 75 static cl::opt<std::string> GenerateFunctionOrderFile( 76 "generate-function-order", 77 cl::desc("file to dump the ordered list of functions to use for function " 78 "reordering"), 79 cl::cat(BoltOptCategory)); 80 81 static cl::opt<std::string> LinkSectionsFile( 82 "generate-link-sections", 83 cl::desc("generate a list of function sections in a format suitable for " 84 "inclusion in a linker script"), 85 cl::cat(BoltOptCategory)); 86 87 static cl::opt<bool> 88 UseEdgeCounts("use-edge-counts", 89 cl::desc("use edge count data when doing clustering"), 90 cl::init(true), cl::cat(BoltOptCategory)); 91 92 static cl::opt<bool> CgFromPerfData( 93 "cg-from-perf-data", 94 cl::desc("use perf data directly when constructing the call graph" 95 " for stale functions"), 96 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 97 98 static cl::opt<bool> CgIgnoreRecursiveCalls( 99 "cg-ignore-recursive-calls", 100 cl::desc("ignore recursive calls when constructing the call graph"), 101 cl::init(true), cl::cat(BoltOptCategory)); 102 103 static cl::opt<bool> CgUseSplitHotSize( 104 "cg-use-split-hot-size", 105 cl::desc("use hot/cold data on basic blocks to determine hot sizes for " 106 "call graph functions"), 107 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 108 109 } // namespace opts 110 111 namespace llvm { 112 namespace bolt { 113 114 using NodeId = CallGraph::NodeId; 115 using Arc = CallGraph::Arc; 116 using Node = CallGraph::Node; 117 118 void ReorderFunctions::reorder(BinaryContext &BC, 119 std::vector<Cluster> &&Clusters, 120 std::map<uint64_t, BinaryFunction> &BFs) { 121 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats 122 uint64_t TotalSize = 0; 123 uint32_t Index = 0; 124 125 // Set order of hot functions based on clusters. 126 for (const Cluster &Cluster : Clusters) { 127 for (const NodeId FuncId : Cluster.targets()) { 128 Cg.nodeIdToFunc(FuncId)->setIndex(Index++); 129 FuncAddr[FuncId] = TotalSize; 130 TotalSize += Cg.size(FuncId); 131 } 132 } 133 134 // Assign valid index for functions with valid profile. 135 for (auto &It : BFs) { 136 BinaryFunction &BF = It.second; 137 if (!BF.hasValidIndex() && BF.hasValidProfile()) 138 BF.setIndex(Index++); 139 } 140 141 if (opts::ReorderFunctions == RT_NONE) 142 return; 143 144 printStats(BC, Clusters, FuncAddr); 145 } 146 147 void ReorderFunctions::printStats(BinaryContext &BC, 148 const std::vector<Cluster> &Clusters, 149 const std::vector<uint64_t> &FuncAddr) { 150 if (opts::Verbosity == 0) { 151 #ifndef NDEBUG 152 if (!DebugFlag || !isCurrentDebugType("hfsort")) 153 return; 154 #else 155 return; 156 #endif 157 } 158 159 bool PrintDetailed = opts::Verbosity > 1; 160 #ifndef NDEBUG 161 PrintDetailed |= 162 (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); 163 #endif 164 uint64_t TotalSize = 0; 165 uint64_t CurPage = 0; 166 uint64_t Hotfuncs = 0; 167 double TotalDistance = 0; 168 double TotalCalls = 0; 169 double TotalCalls64B = 0; 170 double TotalCalls4KB = 0; 171 double TotalCalls2MB = 0; 172 if (PrintDetailed) 173 BC.outs() << "BOLT-INFO: Function reordering page layout\n" 174 << "BOLT-INFO: ============== page 0 ==============\n"; 175 for (const Cluster &Cluster : Clusters) { 176 if (PrintDetailed) 177 BC.outs() << format( 178 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n", 179 Cluster.density(), Cluster.samples(), Cluster.size()); 180 181 for (NodeId FuncId : Cluster.targets()) { 182 if (Cg.samples(FuncId) > 0) { 183 Hotfuncs++; 184 185 if (PrintDetailed) 186 BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) 187 << " (" << Cg.size(FuncId) << ")\n"; 188 189 uint64_t Dist = 0; 190 uint64_t Calls = 0; 191 for (NodeId Dst : Cg.successors(FuncId)) { 192 if (FuncId == Dst) // ignore recursive calls in stats 193 continue; 194 const Arc &Arc = *Cg.findArc(FuncId, Dst); 195 const auto D = std::abs(FuncAddr[Arc.dst()] - 196 (FuncAddr[FuncId] + Arc.avgCallOffset())); 197 const double W = Arc.weight(); 198 if (D < 64 && PrintDetailed && opts::Verbosity > 2) 199 BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n" 200 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) 201 << "\n" 202 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n" 203 << "BOLT-INFO: Weight = " << W << "\n" 204 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() 205 << "\n"; 206 Calls += W; 207 if (D < 64) 208 TotalCalls64B += W; 209 if (D < 4096) 210 TotalCalls4KB += W; 211 if (D < (2 << 20)) 212 TotalCalls2MB += W; 213 Dist += Arc.weight() * D; 214 if (PrintDetailed) 215 BC.outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " 216 "weight = %.0lf, callDist = %f\n", 217 Arc.src(), FuncAddr[Arc.src()], 218 Arc.avgCallOffset(), Arc.dst(), 219 FuncAddr[Arc.dst()], Arc.weight(), D); 220 } 221 TotalCalls += Calls; 222 TotalDistance += Dist; 223 TotalSize += Cg.size(FuncId); 224 225 if (PrintDetailed) { 226 BC.outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ", 227 TotalSize, Calls ? Dist / Calls : 0) 228 << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n'; 229 const uint64_t NewPage = TotalSize / HugePageSize; 230 if (NewPage != CurPage) { 231 CurPage = NewPage; 232 BC.outs() << format( 233 "BOLT-INFO: ============== page %u ==============\n", CurPage); 234 } 235 } 236 } 237 } 238 } 239 BC.outs() << "BOLT-INFO: Function reordering stats\n" 240 << format("BOLT-INFO: Number of hot functions: %u\n" 241 "BOLT-INFO: Number of clusters: %lu\n", 242 Hotfuncs, Clusters.size()) 243 << format("BOLT-INFO: Final average call distance = %.1lf " 244 "(%.0lf / %.0lf)\n", 245 TotalCalls ? TotalDistance / TotalCalls : 0, 246 TotalDistance, TotalCalls) 247 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls); 248 if (TotalCalls) 249 BC.outs() 250 << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n", 251 TotalCalls64B, 100 * TotalCalls64B / TotalCalls) 252 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n", 253 TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls) 254 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n", 255 TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls); 256 } 257 258 Error ReorderFunctions::readFunctionOrderFile( 259 std::vector<std::string> &FunctionNames) { 260 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in); 261 if (!FuncsFile) 262 return createFatalBOLTError(Twine("Ordered functions file \"") + 263 Twine(opts::FunctionOrderFile) + 264 Twine("\" can't be opened.")); 265 266 std::string FuncName; 267 while (std::getline(FuncsFile, FuncName)) 268 FunctionNames.push_back(FuncName); 269 return Error::success(); 270 } 271 272 Error ReorderFunctions::runOnFunctions(BinaryContext &BC) { 273 auto &BFs = BC.getBinaryFunctions(); 274 if (opts::ReorderFunctions != RT_NONE && 275 opts::ReorderFunctions != RT_EXEC_COUNT && 276 opts::ReorderFunctions != RT_USER) { 277 Cg = buildCallGraph( 278 BC, 279 [](const BinaryFunction &BF) { 280 if (!BF.hasProfile()) 281 return true; 282 if (BF.getState() != BinaryFunction::State::CFG) 283 return true; 284 return false; 285 }, 286 opts::CgFromPerfData, 287 /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize, 288 opts::CgUseSplitHotSize, opts::UseEdgeCounts, 289 opts::CgIgnoreRecursiveCalls); 290 Cg.normalizeArcWeights(); 291 } 292 293 std::vector<Cluster> Clusters; 294 295 switch (opts::ReorderFunctions) { 296 case RT_NONE: 297 break; 298 case RT_EXEC_COUNT: { 299 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 300 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), 301 [](BinaryFunction &BF) { return &BF; }); 302 llvm::stable_sort(SortedFunctions, 303 [&](const BinaryFunction *A, const BinaryFunction *B) { 304 if (A->isIgnored()) 305 return false; 306 if (B->isIgnored()) 307 return true; 308 const size_t PadA = opts::padFunctionBefore(*A) + 309 opts::padFunctionAfter(*A); 310 const size_t PadB = opts::padFunctionBefore(*B) + 311 opts::padFunctionAfter(*B); 312 if (!PadA || !PadB) { 313 if (PadA) 314 return true; 315 if (PadB) 316 return false; 317 } 318 if (!A->hasProfile()) 319 return false; 320 if (!B->hasProfile()) 321 return true; 322 return A->getExecutionCount() > B->getExecutionCount(); 323 }); 324 uint32_t Index = 0; 325 for (BinaryFunction *BF : SortedFunctions) 326 if (BF->hasProfile()) { 327 BF->setIndex(Index++); 328 LLVM_DEBUG(if (opts::Verbosity > 1) { 329 dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " (" 330 << BF->getExecutionCount() << ")\n"; 331 }); 332 } 333 } break; 334 case RT_HFSORT: 335 Clusters = clusterize(Cg); 336 break; 337 case RT_CDSORT: { 338 // It is required that the sum of incoming arc weights is not greater 339 // than the number of samples for every function. Ensuring the call graph 340 // obeys the property before running the algorithm. 341 Cg.adjustArcWeights(); 342 343 // Initialize CFG nodes and their data 344 std::vector<uint64_t> FuncSizes; 345 std::vector<uint64_t> FuncCounts; 346 std::vector<codelayout::EdgeCount> CallCounts; 347 std::vector<uint64_t> CallOffsets; 348 for (NodeId F = 0; F < Cg.numNodes(); ++F) { 349 FuncSizes.push_back(Cg.size(F)); 350 FuncCounts.push_back(Cg.samples(F)); 351 for (NodeId Succ : Cg.successors(F)) { 352 const Arc &Arc = *Cg.findArc(F, Succ); 353 CallCounts.push_back({F, Succ, uint64_t(Arc.weight())}); 354 CallOffsets.push_back(uint64_t(Arc.avgCallOffset())); 355 } 356 } 357 358 // Run the layout algorithm. 359 std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout( 360 FuncSizes, FuncCounts, CallCounts, CallOffsets); 361 362 // Create a single cluster from the computed order of hot functions. 363 std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end()); 364 Clusters.emplace_back(Cluster(NodeOrder, Cg)); 365 } break; 366 case RT_PETTIS_HANSEN: 367 Clusters = pettisAndHansen(Cg); 368 break; 369 case RT_RANDOM: 370 std::srand(opts::RandomSeed); 371 Clusters = randomClusters(Cg); 372 break; 373 case RT_USER: { 374 // Build LTOCommonNameMap 375 StringMap<std::vector<uint64_t>> LTOCommonNameMap; 376 for (const BinaryFunction &BF : llvm::make_second_range(BFs)) 377 for (StringRef Name : BF.getNames()) 378 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name)) 379 LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress()); 380 381 uint32_t Index = 0; 382 uint32_t InvalidEntries = 0; 383 std::vector<std::string> FunctionNames; 384 if (Error E = readFunctionOrderFile(FunctionNames)) 385 return Error(std::move(E)); 386 387 for (const std::string &Function : FunctionNames) { 388 std::vector<uint64_t> FuncAddrs; 389 390 BinaryData *BD = BC.getBinaryDataByName(Function); 391 if (!BD) { 392 // If we can't find the main symbol name, look for alternates. 393 uint32_t LocalID = 1; 394 while (true) { 395 const std::string FuncName = Function + "/" + std::to_string(LocalID); 396 BD = BC.getBinaryDataByName(FuncName); 397 if (BD) 398 FuncAddrs.push_back(BD->getAddress()); 399 else 400 break; 401 LocalID++; 402 } 403 // Strip LTO suffixes 404 if (std::optional<StringRef> CommonName = getLTOCommonName(Function)) 405 if (LTOCommonNameMap.contains(*CommonName)) 406 llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]); 407 } else { 408 FuncAddrs.push_back(BD->getAddress()); 409 } 410 411 if (FuncAddrs.empty()) { 412 if (opts::Verbosity >= 1) 413 BC.errs() << "BOLT-WARNING: Reorder functions: can't find function " 414 << "for " << Function << "\n"; 415 ++InvalidEntries; 416 continue; 417 } 418 419 for (const uint64_t FuncAddr : FuncAddrs) { 420 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); 421 assert(FuncBD); 422 423 BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); 424 if (!BF) { 425 if (opts::Verbosity >= 1) 426 BC.errs() << "BOLT-WARNING: Reorder functions: can't find function " 427 << "for " << Function << "\n"; 428 ++InvalidEntries; 429 break; 430 } 431 if (!BF->hasValidIndex()) 432 BF->setIndex(Index++); 433 else if (opts::Verbosity > 0) 434 BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function 435 << "\n"; 436 } 437 } 438 if (InvalidEntries) 439 BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for " 440 << InvalidEntries << " entries in -function-order list\n"; 441 } break; 442 443 default: 444 llvm_unreachable("unexpected layout type"); 445 } 446 447 reorder(BC, std::move(Clusters), BFs); 448 449 BC.HasFinalizedFunctionOrder = true; 450 451 std::unique_ptr<std::ofstream> FuncsFile; 452 if (!opts::GenerateFunctionOrderFile.empty()) { 453 FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile, 454 std::ios::out); 455 if (!FuncsFile) { 456 BC.errs() << "BOLT-ERROR: ordered functions file " 457 << opts::GenerateFunctionOrderFile << " cannot be opened\n"; 458 return createFatalBOLTError(""); 459 } 460 } 461 462 std::unique_ptr<std::ofstream> LinkSectionsFile; 463 if (!opts::LinkSectionsFile.empty()) { 464 LinkSectionsFile = 465 std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out); 466 if (!LinkSectionsFile) { 467 BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile 468 << " cannot be opened\n"; 469 return createFatalBOLTError(""); 470 } 471 } 472 473 if (FuncsFile || LinkSectionsFile) { 474 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 475 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), 476 [](BinaryFunction &BF) { return &BF; }); 477 478 // Sort functions by index. 479 llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex); 480 481 for (const BinaryFunction *Func : SortedFunctions) { 482 if (!Func->hasValidIndex()) 483 break; 484 if (Func->isPLTFunction()) 485 continue; 486 487 if (FuncsFile) 488 *FuncsFile << Func->getOneName().str() << '\n'; 489 490 if (LinkSectionsFile) { 491 const char *Indent = ""; 492 std::vector<StringRef> AllNames = Func->getNames(); 493 llvm::sort(AllNames); 494 for (StringRef Name : AllNames) { 495 const size_t SlashPos = Name.find('/'); 496 if (SlashPos != std::string::npos) { 497 // Avoid duplicates for local functions. 498 if (Name.find('/', SlashPos + 1) != std::string::npos) 499 continue; 500 Name = Name.substr(0, SlashPos); 501 } 502 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n'; 503 Indent = " "; 504 } 505 } 506 } 507 508 if (FuncsFile) { 509 FuncsFile->close(); 510 BC.outs() << "BOLT-INFO: dumped function order to " 511 << opts::GenerateFunctionOrderFile << '\n'; 512 } 513 514 if (LinkSectionsFile) { 515 LinkSectionsFile->close(); 516 BC.outs() << "BOLT-INFO: dumped linker section order to " 517 << opts::LinkSectionsFile << '\n'; 518 } 519 } 520 return Error::success(); 521 } 522 523 } // namespace bolt 524 } // namespace llvm 525