1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements ReorderFunctions class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ReorderFunctions.h" 14 #include "bolt/Passes/HFSort.h" 15 #include "bolt/Utils/Utils.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/CommandLine.h" 18 #include <fstream> 19 20 #define DEBUG_TYPE "hfsort" 21 22 using namespace llvm; 23 24 namespace opts { 25 26 extern cl::OptionCategory BoltOptCategory; 27 extern cl::opt<unsigned> Verbosity; 28 extern cl::opt<uint32_t> RandomSeed; 29 30 extern size_t padFunction(const bolt::BinaryFunction &Function); 31 32 cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions( 33 "reorder-functions", 34 cl::desc("reorder and cluster functions (works only with relocations)"), 35 cl::init(bolt::ReorderFunctions::RT_NONE), 36 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none", 37 "do not reorder functions"), 38 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count", 39 "order by execution count"), 40 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort", 41 "use hfsort algorithm"), 42 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+", 43 "use hfsort+ algorithm"), 44 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, 45 "pettis-hansen", "use Pettis-Hansen algorithm"), 46 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random", 47 "reorder functions randomly"), 48 clEnumValN(bolt::ReorderFunctions::RT_USER, "user", 49 "use function order specified by -function-order")), 50 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 51 52 static cl::opt<bool> ReorderFunctionsUseHotSize( 53 "reorder-functions-use-hot-size", 54 cl::desc("use a function's hot size when doing clustering"), cl::init(true), 55 cl::cat(BoltOptCategory)); 56 57 static cl::opt<std::string> FunctionOrderFile( 58 "function-order", 59 cl::desc("file containing an ordered list of functions to use for function " 60 "reordering"), 61 cl::cat(BoltOptCategory)); 62 63 static cl::opt<std::string> GenerateFunctionOrderFile( 64 "generate-function-order", 65 cl::desc("file to dump the ordered list of functions to use for function " 66 "reordering"), 67 cl::cat(BoltOptCategory)); 68 69 static cl::opt<std::string> LinkSectionsFile( 70 "generate-link-sections", 71 cl::desc("generate a list of function sections in a format suitable for " 72 "inclusion in a linker script"), 73 cl::cat(BoltOptCategory)); 74 75 static cl::opt<bool> 76 UseEdgeCounts("use-edge-counts", 77 cl::desc("use edge count data when doing clustering"), 78 cl::init(true), cl::cat(BoltOptCategory)); 79 80 static cl::opt<bool> CgFromPerfData( 81 "cg-from-perf-data", 82 cl::desc("use perf data directly when constructing the call graph" 83 " for stale functions"), 84 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 85 86 static cl::opt<bool> CgIgnoreRecursiveCalls( 87 "cg-ignore-recursive-calls", 88 cl::desc("ignore recursive calls when constructing the call graph"), 89 cl::init(true), cl::cat(BoltOptCategory)); 90 91 static cl::opt<bool> CgUseSplitHotSize( 92 "cg-use-split-hot-size", 93 cl::desc("use hot/cold data on basic blocks to determine hot sizes for " 94 "call graph functions"), 95 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 96 97 } // namespace opts 98 99 namespace llvm { 100 namespace bolt { 101 102 using NodeId = CallGraph::NodeId; 103 using Arc = CallGraph::Arc; 104 using Node = CallGraph::Node; 105 106 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters, 107 std::map<uint64_t, BinaryFunction> &BFs) { 108 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats 109 uint64_t TotalSize = 0; 110 uint32_t Index = 0; 111 112 // Set order of hot functions based on clusters. 113 for (const Cluster &Cluster : Clusters) { 114 for (const NodeId FuncId : Cluster.targets()) { 115 Cg.nodeIdToFunc(FuncId)->setIndex(Index++); 116 FuncAddr[FuncId] = TotalSize; 117 TotalSize += Cg.size(FuncId); 118 } 119 } 120 121 // Assign valid index for functions with valid profile. 122 for (auto &It : BFs) { 123 BinaryFunction &BF = It.second; 124 if (!BF.hasValidIndex() && BF.hasValidProfile()) 125 BF.setIndex(Index++); 126 } 127 128 if (opts::ReorderFunctions == RT_NONE) 129 return; 130 131 printStats(Clusters, FuncAddr); 132 } 133 134 void ReorderFunctions::printStats(const std::vector<Cluster> &Clusters, 135 const std::vector<uint64_t> &FuncAddr) { 136 if (opts::Verbosity == 0) { 137 #ifndef NDEBUG 138 if (!DebugFlag || !isCurrentDebugType("hfsort")) 139 return; 140 #else 141 return; 142 #endif 143 } 144 145 bool PrintDetailed = opts::Verbosity > 1; 146 #ifndef NDEBUG 147 PrintDetailed |= 148 (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); 149 #endif 150 uint64_t TotalSize = 0; 151 uint64_t CurPage = 0; 152 uint64_t Hotfuncs = 0; 153 double TotalDistance = 0; 154 double TotalCalls = 0; 155 double TotalCalls64B = 0; 156 double TotalCalls4KB = 0; 157 double TotalCalls2MB = 0; 158 if (PrintDetailed) 159 outs() << "BOLT-INFO: Function reordering page layout\n" 160 << "BOLT-INFO: ============== page 0 ==============\n"; 161 for (const Cluster &Cluster : Clusters) { 162 if (PrintDetailed) 163 outs() << format( 164 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n", 165 Cluster.density(), Cluster.samples(), Cluster.size()); 166 167 for (NodeId FuncId : Cluster.targets()) { 168 if (Cg.samples(FuncId) > 0) { 169 Hotfuncs++; 170 171 if (PrintDetailed) 172 outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " (" 173 << Cg.size(FuncId) << ")\n"; 174 175 uint64_t Dist = 0; 176 uint64_t Calls = 0; 177 for (NodeId Dst : Cg.successors(FuncId)) { 178 if (FuncId == Dst) // ignore recursive calls in stats 179 continue; 180 const Arc &Arc = *Cg.findArc(FuncId, Dst); 181 const auto D = std::abs(FuncAddr[Arc.dst()] - 182 (FuncAddr[FuncId] + Arc.avgCallOffset())); 183 const double W = Arc.weight(); 184 if (D < 64 && PrintDetailed && opts::Verbosity > 2) 185 outs() << "BOLT-INFO: short (" << D << "B) call:\n" 186 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n" 187 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n" 188 << "BOLT-INFO: Weight = " << W << "\n" 189 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() 190 << "\n"; 191 Calls += W; 192 if (D < 64) 193 TotalCalls64B += W; 194 if (D < 4096) 195 TotalCalls4KB += W; 196 if (D < (2 << 20)) 197 TotalCalls2MB += W; 198 Dist += Arc.weight() * D; 199 if (PrintDetailed) 200 outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " 201 "weight = %.0lf, callDist = %f\n", 202 Arc.src(), FuncAddr[Arc.src()], 203 Arc.avgCallOffset(), Arc.dst(), 204 FuncAddr[Arc.dst()], Arc.weight(), D); 205 } 206 TotalCalls += Calls; 207 TotalDistance += Dist; 208 TotalSize += Cg.size(FuncId); 209 210 if (PrintDetailed) { 211 outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ", 212 TotalSize, Calls ? Dist / Calls : 0) 213 << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n'; 214 const uint64_t NewPage = TotalSize / HugePageSize; 215 if (NewPage != CurPage) { 216 CurPage = NewPage; 217 outs() << format( 218 "BOLT-INFO: ============== page %u ==============\n", CurPage); 219 } 220 } 221 } 222 } 223 } 224 outs() << "BOLT-INFO: Function reordering stats\n" 225 << format("BOLT-INFO: Number of hot functions: %u\n" 226 "BOLT-INFO: Number of clusters: %lu\n", 227 Hotfuncs, Clusters.size()) 228 << format("BOLT-INFO: Final average call distance = %.1lf " 229 "(%.0lf / %.0lf)\n", 230 TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance, 231 TotalCalls) 232 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls); 233 if (TotalCalls) 234 outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n", 235 TotalCalls64B, 100 * TotalCalls64B / TotalCalls) 236 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n", 237 TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls) 238 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n", 239 TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls); 240 } 241 242 std::vector<std::string> ReorderFunctions::readFunctionOrderFile() { 243 std::vector<std::string> FunctionNames; 244 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in); 245 if (!FuncsFile) { 246 errs() << "Ordered functions file \"" << opts::FunctionOrderFile 247 << "\" can't be opened.\n"; 248 exit(1); 249 } 250 std::string FuncName; 251 while (std::getline(FuncsFile, FuncName)) 252 FunctionNames.push_back(FuncName); 253 return FunctionNames; 254 } 255 256 void ReorderFunctions::runOnFunctions(BinaryContext &BC) { 257 auto &BFs = BC.getBinaryFunctions(); 258 if (opts::ReorderFunctions != RT_NONE && 259 opts::ReorderFunctions != RT_EXEC_COUNT && 260 opts::ReorderFunctions != RT_USER) { 261 Cg = buildCallGraph( 262 BC, 263 [](const BinaryFunction &BF) { 264 if (!BF.hasProfile()) 265 return true; 266 if (BF.getState() != BinaryFunction::State::CFG) 267 return true; 268 return false; 269 }, 270 opts::CgFromPerfData, 271 /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize, 272 opts::CgUseSplitHotSize, opts::UseEdgeCounts, 273 opts::CgIgnoreRecursiveCalls); 274 Cg.normalizeArcWeights(); 275 } 276 277 std::vector<Cluster> Clusters; 278 279 switch (opts::ReorderFunctions) { 280 case RT_NONE: 281 break; 282 case RT_EXEC_COUNT: { 283 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 284 uint32_t Index = 0; 285 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), 286 [](BinaryFunction &BF) { return &BF; }); 287 llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A, 288 const BinaryFunction *B) { 289 if (A->isIgnored()) 290 return false; 291 const size_t PadA = opts::padFunction(*A); 292 const size_t PadB = opts::padFunction(*B); 293 if (!PadA || !PadB) { 294 if (PadA) 295 return true; 296 if (PadB) 297 return false; 298 } 299 return !A->hasProfile() && (B->hasProfile() || (A->getExecutionCount() > 300 B->getExecutionCount())); 301 }); 302 for (BinaryFunction *BF : SortedFunctions) 303 if (BF->hasProfile()) 304 BF->setIndex(Index++); 305 } break; 306 case RT_HFSORT: 307 Clusters = clusterize(Cg); 308 break; 309 case RT_HFSORT_PLUS: 310 Clusters = hfsortPlus(Cg); 311 break; 312 case RT_PETTIS_HANSEN: 313 Clusters = pettisAndHansen(Cg); 314 break; 315 case RT_RANDOM: 316 std::srand(opts::RandomSeed); 317 Clusters = randomClusters(Cg); 318 break; 319 case RT_USER: { 320 // Build LTOCommonNameMap 321 StringMap<std::vector<uint64_t>> LTOCommonNameMap; 322 for (const BinaryFunction &BF : llvm::make_second_range(BFs)) 323 for (StringRef Name : BF.getNames()) 324 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name)) 325 LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress()); 326 327 uint32_t Index = 0; 328 uint32_t InvalidEntries = 0; 329 for (const std::string &Function : readFunctionOrderFile()) { 330 std::vector<uint64_t> FuncAddrs; 331 332 BinaryData *BD = BC.getBinaryDataByName(Function); 333 if (!BD) { 334 // If we can't find the main symbol name, look for alternates. 335 uint32_t LocalID = 1; 336 while (true) { 337 const std::string FuncName = Function + "/" + std::to_string(LocalID); 338 BD = BC.getBinaryDataByName(FuncName); 339 if (BD) 340 FuncAddrs.push_back(BD->getAddress()); 341 else 342 break; 343 LocalID++; 344 } 345 // Strip LTO suffixes 346 if (std::optional<StringRef> CommonName = getLTOCommonName(Function)) 347 if (LTOCommonNameMap.contains(*CommonName)) 348 llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]); 349 } else { 350 FuncAddrs.push_back(BD->getAddress()); 351 } 352 353 if (FuncAddrs.empty()) { 354 if (opts::Verbosity >= 1) 355 errs() << "BOLT-WARNING: Reorder functions: can't find function " 356 << "for " << Function << "\n"; 357 ++InvalidEntries; 358 continue; 359 } 360 361 for (const uint64_t FuncAddr : FuncAddrs) { 362 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); 363 assert(FuncBD); 364 365 BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); 366 if (!BF) { 367 if (opts::Verbosity >= 1) 368 errs() << "BOLT-WARNING: Reorder functions: can't find function " 369 << "for " << Function << "\n"; 370 ++InvalidEntries; 371 break; 372 } 373 if (!BF->hasValidIndex()) 374 BF->setIndex(Index++); 375 else if (opts::Verbosity > 0) 376 errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function 377 << "\n"; 378 } 379 } 380 if (InvalidEntries) 381 errs() << "BOLT-WARNING: Reorder functions: can't find functions for " 382 << InvalidEntries << " entries in -function-order list\n"; 383 } break; 384 } 385 386 reorder(std::move(Clusters), BFs); 387 388 std::unique_ptr<std::ofstream> FuncsFile; 389 if (!opts::GenerateFunctionOrderFile.empty()) { 390 FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile, 391 std::ios::out); 392 if (!FuncsFile) { 393 errs() << "BOLT-ERROR: ordered functions file " 394 << opts::GenerateFunctionOrderFile << " cannot be opened\n"; 395 exit(1); 396 } 397 } 398 399 std::unique_ptr<std::ofstream> LinkSectionsFile; 400 if (!opts::LinkSectionsFile.empty()) { 401 LinkSectionsFile = 402 std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out); 403 if (!LinkSectionsFile) { 404 errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile 405 << " cannot be opened\n"; 406 exit(1); 407 } 408 } 409 410 if (FuncsFile || LinkSectionsFile) { 411 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 412 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), 413 [](BinaryFunction &BF) { return &BF; }); 414 415 // Sort functions by index. 416 llvm::stable_sort(SortedFunctions, 417 [](const BinaryFunction *A, const BinaryFunction *B) { 418 if (A->hasValidIndex() && B->hasValidIndex()) 419 return A->getIndex() < B->getIndex(); 420 if (A->hasValidIndex() && !B->hasValidIndex()) 421 return true; 422 if (!A->hasValidIndex() && B->hasValidIndex()) 423 return false; 424 return A->getAddress() < B->getAddress(); 425 }); 426 427 for (const BinaryFunction *Func : SortedFunctions) { 428 if (!Func->hasValidIndex()) 429 break; 430 if (Func->isPLTFunction()) 431 continue; 432 433 if (FuncsFile) 434 *FuncsFile << Func->getOneName().str() << '\n'; 435 436 if (LinkSectionsFile) { 437 const char *Indent = ""; 438 std::vector<StringRef> AllNames = Func->getNames(); 439 llvm::sort(AllNames); 440 for (StringRef Name : AllNames) { 441 const size_t SlashPos = Name.find('/'); 442 if (SlashPos != std::string::npos) { 443 // Avoid duplicates for local functions. 444 if (Name.find('/', SlashPos + 1) != std::string::npos) 445 continue; 446 Name = Name.substr(0, SlashPos); 447 } 448 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n'; 449 Indent = " "; 450 } 451 } 452 } 453 454 if (FuncsFile) { 455 FuncsFile->close(); 456 outs() << "BOLT-INFO: dumped function order to " 457 << opts::GenerateFunctionOrderFile << '\n'; 458 } 459 460 if (LinkSectionsFile) { 461 LinkSectionsFile->close(); 462 outs() << "BOLT-INFO: dumped linker section order to " 463 << opts::LinkSectionsFile << '\n'; 464 } 465 } 466 } 467 468 } // namespace bolt 469 } // namespace llvm 470