1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements ReorderFunctions class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ReorderFunctions.h" 14 #include "bolt/Passes/HFSort.h" 15 #include "bolt/Utils/Utils.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/CommandLine.h" 18 #include <fstream> 19 20 #define DEBUG_TYPE "hfsort" 21 22 using namespace llvm; 23 24 namespace opts { 25 26 extern cl::OptionCategory BoltOptCategory; 27 extern cl::opt<unsigned> Verbosity; 28 extern cl::opt<uint32_t> RandomSeed; 29 30 extern size_t padFunction(const bolt::BinaryFunction &Function); 31 32 cl::opt<bolt::ReorderFunctions::ReorderType> 33 ReorderFunctions("reorder-functions", 34 cl::desc("reorder and cluster functions (works only with relocations)"), 35 cl::init(bolt::ReorderFunctions::RT_NONE), 36 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, 37 "none", 38 "do not reorder functions"), 39 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, 40 "exec-count", 41 "order by execution count"), 42 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, 43 "hfsort", 44 "use hfsort algorithm"), 45 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, 46 "hfsort+", 47 "use hfsort+ algorithm"), 48 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, 49 "pettis-hansen", 50 "use Pettis-Hansen algorithm"), 51 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, 52 "random", 53 "reorder functions randomly"), 54 clEnumValN(bolt::ReorderFunctions::RT_USER, 55 "user", 56 "use function order specified by -function-order")), 57 cl::ZeroOrMore, 58 cl::cat(BoltOptCategory)); 59 60 static cl::opt<bool> ReorderFunctionsUseHotSize( 61 "reorder-functions-use-hot-size", 62 cl::desc("use a function's hot size when doing clustering"), cl::init(true), 63 cl::cat(BoltOptCategory)); 64 65 static cl::opt<std::string> 66 FunctionOrderFile("function-order", 67 cl::desc("file containing an ordered list of functions to use for function " 68 "reordering"), 69 cl::cat(BoltOptCategory)); 70 71 static cl::opt<std::string> 72 GenerateFunctionOrderFile("generate-function-order", 73 cl::desc("file to dump the ordered list of functions to use for function " 74 "reordering"), 75 cl::cat(BoltOptCategory)); 76 77 static cl::opt<std::string> 78 LinkSectionsFile("generate-link-sections", 79 cl::desc("generate a list of function sections in a format suitable for " 80 "inclusion in a linker script"), 81 cl::cat(BoltOptCategory)); 82 83 static cl::opt<bool> 84 UseEdgeCounts("use-edge-counts", 85 cl::desc("use edge count data when doing clustering"), 86 cl::init(true), cl::cat(BoltOptCategory)); 87 88 static cl::opt<bool> 89 CgFromPerfData("cg-from-perf-data", 90 cl::desc("use perf data directly when constructing the call graph" 91 " for stale functions"), 92 cl::init(true), 93 cl::ZeroOrMore, 94 cl::cat(BoltOptCategory)); 95 96 static cl::opt<bool> CgIgnoreRecursiveCalls( 97 "cg-ignore-recursive-calls", 98 cl::desc("ignore recursive calls when constructing the call graph"), 99 cl::init(true), cl::cat(BoltOptCategory)); 100 101 static cl::opt<bool> 102 CgUseSplitHotSize("cg-use-split-hot-size", 103 cl::desc("use hot/cold data on basic blocks to determine hot sizes for " 104 "call graph functions"), 105 cl::init(false), 106 cl::ZeroOrMore, 107 cl::cat(BoltOptCategory)); 108 109 } // namespace opts 110 111 namespace llvm { 112 namespace bolt { 113 114 using NodeId = CallGraph::NodeId; 115 using Arc = CallGraph::Arc; 116 using Node = CallGraph::Node; 117 118 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters, 119 std::map<uint64_t, BinaryFunction> &BFs) { 120 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats 121 uint64_t TotalSize = 0; 122 uint32_t Index = 0; 123 124 // Set order of hot functions based on clusters. 125 for (const Cluster &Cluster : Clusters) { 126 for (const NodeId FuncId : Cluster.targets()) { 127 Cg.nodeIdToFunc(FuncId)->setIndex(Index++); 128 FuncAddr[FuncId] = TotalSize; 129 TotalSize += Cg.size(FuncId); 130 } 131 } 132 133 // Assign valid index for functions with valid profile. 134 for (auto &It : BFs) { 135 BinaryFunction &BF = It.second; 136 if (!BF.hasValidIndex() && BF.hasValidProfile()) 137 BF.setIndex(Index++); 138 } 139 140 if (opts::ReorderFunctions == RT_NONE) 141 return; 142 143 printStats(Clusters, FuncAddr); 144 } 145 146 void ReorderFunctions::printStats(const std::vector<Cluster> &Clusters, 147 const std::vector<uint64_t> &FuncAddr) { 148 if (opts::Verbosity == 0) { 149 #ifndef NDEBUG 150 if (!DebugFlag || !isCurrentDebugType("hfsort")) 151 return; 152 #else 153 return; 154 #endif 155 } 156 157 bool PrintDetailed = opts::Verbosity > 1; 158 #ifndef NDEBUG 159 PrintDetailed |= 160 (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); 161 #endif 162 uint64_t TotalSize = 0; 163 uint64_t CurPage = 0; 164 uint64_t Hotfuncs = 0; 165 double TotalDistance = 0; 166 double TotalCalls = 0; 167 double TotalCalls64B = 0; 168 double TotalCalls4KB = 0; 169 double TotalCalls2MB = 0; 170 if (PrintDetailed) 171 outs() << "BOLT-INFO: Function reordering page layout\n" 172 << "BOLT-INFO: ============== page 0 ==============\n"; 173 for (const Cluster &Cluster : Clusters) { 174 if (PrintDetailed) 175 outs() << format( 176 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n", 177 Cluster.density(), Cluster.samples(), Cluster.size()); 178 179 for (NodeId FuncId : Cluster.targets()) { 180 if (Cg.samples(FuncId) > 0) { 181 Hotfuncs++; 182 183 if (PrintDetailed) 184 outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " (" 185 << Cg.size(FuncId) << ")\n"; 186 187 uint64_t Dist = 0; 188 uint64_t Calls = 0; 189 for (NodeId Dst : Cg.successors(FuncId)) { 190 if (FuncId == Dst) // ignore recursive calls in stats 191 continue; 192 const Arc &Arc = *Cg.findArc(FuncId, Dst); 193 const auto D = std::abs(FuncAddr[Arc.dst()] - 194 (FuncAddr[FuncId] + Arc.avgCallOffset())); 195 const double W = Arc.weight(); 196 if (D < 64 && PrintDetailed && opts::Verbosity > 2) 197 outs() << "BOLT-INFO: short (" << D << "B) call:\n" 198 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n" 199 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n" 200 << "BOLT-INFO: Weight = " << W << "\n" 201 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() << "\n"; 202 Calls += W; 203 if (D < 64) TotalCalls64B += W; 204 if (D < 4096) TotalCalls4KB += W; 205 if (D < (2 << 20)) TotalCalls2MB += W; 206 Dist += Arc.weight() * D; 207 if (PrintDetailed) 208 outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " 209 "weight = %.0lf, callDist = %f\n", 210 Arc.src(), 211 FuncAddr[Arc.src()], 212 Arc.avgCallOffset(), 213 Arc.dst(), 214 FuncAddr[Arc.dst()], 215 Arc.weight(), D); 216 } 217 TotalCalls += Calls; 218 TotalDistance += Dist; 219 TotalSize += Cg.size(FuncId); 220 221 if (PrintDetailed) { 222 outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ", 223 TotalSize, Calls ? Dist / Calls : 0) 224 << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n'; 225 const uint64_t NewPage = TotalSize / HugePageSize; 226 if (NewPage != CurPage) { 227 CurPage = NewPage; 228 outs() << format( 229 "BOLT-INFO: ============== page %u ==============\n", CurPage); 230 } 231 } 232 } 233 } 234 } 235 outs() << "BOLT-INFO: Function reordering stats\n" 236 << format("BOLT-INFO: Number of hot functions: %u\n" 237 "BOLT-INFO: Number of clusters: %lu\n", 238 Hotfuncs, Clusters.size()) 239 << format("BOLT-INFO: Final average call distance = %.1lf " 240 "(%.0lf / %.0lf)\n", 241 TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance, 242 TotalCalls) 243 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls); 244 if (TotalCalls) 245 outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n", 246 TotalCalls64B, 100 * TotalCalls64B / TotalCalls) 247 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n", 248 TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls) 249 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n", 250 TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls); 251 } 252 253 std::vector<std::string> ReorderFunctions::readFunctionOrderFile() { 254 std::vector<std::string> FunctionNames; 255 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in); 256 if (!FuncsFile) { 257 errs() << "Ordered functions file \"" << opts::FunctionOrderFile 258 << "\" can't be opened.\n"; 259 exit(1); 260 } 261 std::string FuncName; 262 while (std::getline(FuncsFile, FuncName)) 263 FunctionNames.push_back(FuncName); 264 return FunctionNames; 265 } 266 267 void ReorderFunctions::runOnFunctions(BinaryContext &BC) { 268 auto &BFs = BC.getBinaryFunctions(); 269 if (opts::ReorderFunctions != RT_NONE && 270 opts::ReorderFunctions != RT_EXEC_COUNT && 271 opts::ReorderFunctions != RT_USER) { 272 Cg = buildCallGraph( 273 BC, 274 [](const BinaryFunction &BF) { 275 if (!BF.hasProfile()) 276 return true; 277 if (BF.getState() != BinaryFunction::State::CFG) 278 return true; 279 return false; 280 }, 281 opts::CgFromPerfData, 282 /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize, 283 opts::CgUseSplitHotSize, opts::UseEdgeCounts, 284 opts::CgIgnoreRecursiveCalls); 285 Cg.normalizeArcWeights(); 286 } 287 288 std::vector<Cluster> Clusters; 289 290 switch (opts::ReorderFunctions) { 291 case RT_NONE: 292 break; 293 case RT_EXEC_COUNT: 294 { 295 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 296 uint32_t Index = 0; 297 llvm::transform(BFs, SortedFunctions.begin(), 298 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 299 return &BFI.second; 300 }); 301 llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A, 302 const BinaryFunction *B) { 303 if (A->isIgnored()) 304 return false; 305 const size_t PadA = opts::padFunction(*A); 306 const size_t PadB = opts::padFunction(*B); 307 if (!PadA || !PadB) { 308 if (PadA) 309 return true; 310 if (PadB) 311 return false; 312 } 313 return !A->hasProfile() && 314 (B->hasProfile() || 315 (A->getExecutionCount() > B->getExecutionCount())); 316 }); 317 for (BinaryFunction *BF : SortedFunctions) 318 if (BF->hasProfile()) 319 BF->setIndex(Index++); 320 } 321 break; 322 case RT_HFSORT: 323 Clusters = clusterize(Cg); 324 break; 325 case RT_HFSORT_PLUS: 326 Clusters = hfsortPlus(Cg); 327 break; 328 case RT_PETTIS_HANSEN: 329 Clusters = pettisAndHansen(Cg); 330 break; 331 case RT_RANDOM: 332 std::srand(opts::RandomSeed); 333 Clusters = randomClusters(Cg); 334 break; 335 case RT_USER: 336 { 337 // Build LTOCommonNameMap 338 StringMap<std::vector<uint64_t>> LTOCommonNameMap; 339 for (const BinaryFunction &BF : llvm::make_second_range(BFs)) 340 for (StringRef Name : BF.getNames()) 341 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name)) 342 LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress()); 343 344 uint32_t Index = 0; 345 uint32_t InvalidEntries = 0; 346 for (const std::string &Function : readFunctionOrderFile()) { 347 std::vector<uint64_t> FuncAddrs; 348 349 BinaryData *BD = BC.getBinaryDataByName(Function); 350 if (!BD) { 351 // If we can't find the main symbol name, look for alternates. 352 uint32_t LocalID = 1; 353 while (true) { 354 const std::string FuncName = 355 Function + "/" + std::to_string(LocalID); 356 BD = BC.getBinaryDataByName(FuncName); 357 if (BD) 358 FuncAddrs.push_back(BD->getAddress()); 359 else 360 break; 361 LocalID++; 362 } 363 // Strip LTO suffixes 364 if (std::optional<StringRef> CommonName = getLTOCommonName(Function)) 365 if (LTOCommonNameMap.find(*CommonName) != LTOCommonNameMap.end()) 366 llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]); 367 } else { 368 FuncAddrs.push_back(BD->getAddress()); 369 } 370 371 if (FuncAddrs.empty()) { 372 if (opts::Verbosity >= 1) 373 errs() << "BOLT-WARNING: Reorder functions: can't find function " 374 << "for " << Function << "\n"; 375 ++InvalidEntries; 376 continue; 377 } 378 379 for (const uint64_t FuncAddr : FuncAddrs) { 380 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); 381 assert(FuncBD); 382 383 BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); 384 if (!BF) { 385 if (opts::Verbosity >= 1) 386 errs() << "BOLT-WARNING: Reorder functions: can't find function " 387 << "for " << Function << "\n"; 388 ++InvalidEntries; 389 break; 390 } 391 if (!BF->hasValidIndex()) 392 BF->setIndex(Index++); 393 else if (opts::Verbosity > 0) 394 errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function 395 << "\n"; 396 } 397 } 398 if (InvalidEntries) 399 errs() << "BOLT-WARNING: Reorder functions: can't find functions for " 400 << InvalidEntries << " entries in -function-order list\n"; 401 } 402 break; 403 } 404 405 reorder(std::move(Clusters), BFs); 406 407 std::unique_ptr<std::ofstream> FuncsFile; 408 if (!opts::GenerateFunctionOrderFile.empty()) { 409 FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile, 410 std::ios::out); 411 if (!FuncsFile) { 412 errs() << "BOLT-ERROR: ordered functions file " 413 << opts::GenerateFunctionOrderFile << " cannot be opened\n"; 414 exit(1); 415 } 416 } 417 418 std::unique_ptr<std::ofstream> LinkSectionsFile; 419 if (!opts::LinkSectionsFile.empty()) { 420 LinkSectionsFile = 421 std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out); 422 if (!LinkSectionsFile) { 423 errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile 424 << " cannot be opened\n"; 425 exit(1); 426 } 427 } 428 429 if (FuncsFile || LinkSectionsFile) { 430 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 431 llvm::transform(BFs, SortedFunctions.begin(), 432 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 433 return &BFI.second; 434 }); 435 436 // Sort functions by index. 437 llvm::stable_sort(SortedFunctions, 438 [](const BinaryFunction *A, const BinaryFunction *B) { 439 if (A->hasValidIndex() && B->hasValidIndex()) 440 return A->getIndex() < B->getIndex(); 441 if (A->hasValidIndex() && !B->hasValidIndex()) 442 return true; 443 if (!A->hasValidIndex() && B->hasValidIndex()) 444 return false; 445 return A->getAddress() < B->getAddress(); 446 }); 447 448 for (const BinaryFunction *Func : SortedFunctions) { 449 if (!Func->hasValidIndex()) 450 break; 451 if (Func->isPLTFunction()) 452 continue; 453 454 if (FuncsFile) 455 *FuncsFile << Func->getOneName().str() << '\n'; 456 457 if (LinkSectionsFile) { 458 const char *Indent = ""; 459 std::vector<StringRef> AllNames = Func->getNames(); 460 llvm::sort(AllNames); 461 for (StringRef Name : AllNames) { 462 const size_t SlashPos = Name.find('/'); 463 if (SlashPos != std::string::npos) { 464 // Avoid duplicates for local functions. 465 if (Name.find('/', SlashPos + 1) != std::string::npos) 466 continue; 467 Name = Name.substr(0, SlashPos); 468 } 469 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n'; 470 Indent = " "; 471 } 472 } 473 } 474 475 if (FuncsFile) { 476 FuncsFile->close(); 477 outs() << "BOLT-INFO: dumped function order to " 478 << opts::GenerateFunctionOrderFile << '\n'; 479 } 480 481 if (LinkSectionsFile) { 482 LinkSectionsFile->close(); 483 outs() << "BOLT-INFO: dumped linker section order to " 484 << opts::LinkSectionsFile << '\n'; 485 } 486 } 487 } 488 489 } // namespace bolt 490 } // namespace llvm 491