xref: /llvm-project/bolt/lib/Passes/ReorderFunctions.cpp (revision aa9cc721e58f086ba6a3f9711fefdb61a184f786)
1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Transforms/Utils/CodeLayout.h"
19 #include <fstream>
20 
21 #define DEBUG_TYPE "hfsort"
22 
23 using namespace llvm;
24 
25 namespace opts {
26 
27 extern cl::OptionCategory BoltOptCategory;
28 extern cl::opt<unsigned> Verbosity;
29 extern cl::opt<uint32_t> RandomSeed;
30 
31 extern size_t padFunctionBefore(const bolt::BinaryFunction &Function);
32 extern size_t padFunctionAfter(const bolt::BinaryFunction &Function);
33 
34 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
35 cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
36     "reorder-functions",
37     cl::desc("reorder and cluster functions (works only with relocations)"),
38     cl::init(bolt::ReorderFunctions::RT_NONE),
39     cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none",
40                           "do not reorder functions"),
41                clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count",
42                           "order by execution count"),
43                clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
44                           "use hfsort algorithm"),
45                clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
46                           "use cache-directed sort"),
47                clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort",
48                           "use cache-directed sort"),
49                clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
50                           "pettis-hansen", "use Pettis-Hansen algorithm"),
51                clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
52                           "reorder functions randomly"),
53                clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
54                           "use function order specified by -function-order")),
55     cl::ZeroOrMore, cl::cat(BoltOptCategory),
56     cl::callback([](const bolt::ReorderFunctions::ReorderType &option) {
57       if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) {
58         errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated,"
59                << " please use '-reorder-functions=cdsort' instead\n";
60         ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT;
61       }
62     }));
63 
64 static cl::opt<bool> ReorderFunctionsUseHotSize(
65     "reorder-functions-use-hot-size",
66     cl::desc("use a function's hot size when doing clustering"), cl::init(true),
67     cl::cat(BoltOptCategory));
68 
69 static cl::opt<std::string> FunctionOrderFile(
70     "function-order",
71     cl::desc("file containing an ordered list of functions to use for function "
72              "reordering"),
73     cl::cat(BoltOptCategory));
74 
75 static cl::opt<std::string> GenerateFunctionOrderFile(
76     "generate-function-order",
77     cl::desc("file to dump the ordered list of functions to use for function "
78              "reordering"),
79     cl::cat(BoltOptCategory));
80 
81 static cl::opt<std::string> LinkSectionsFile(
82     "generate-link-sections",
83     cl::desc("generate a list of function sections in a format suitable for "
84              "inclusion in a linker script"),
85     cl::cat(BoltOptCategory));
86 
87 static cl::opt<bool>
88     UseEdgeCounts("use-edge-counts",
89                   cl::desc("use edge count data when doing clustering"),
90                   cl::init(true), cl::cat(BoltOptCategory));
91 
92 static cl::opt<bool> CgFromPerfData(
93     "cg-from-perf-data",
94     cl::desc("use perf data directly when constructing the call graph"
95              " for stale functions"),
96     cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
97 
98 static cl::opt<bool> CgIgnoreRecursiveCalls(
99     "cg-ignore-recursive-calls",
100     cl::desc("ignore recursive calls when constructing the call graph"),
101     cl::init(true), cl::cat(BoltOptCategory));
102 
103 static cl::opt<bool> CgUseSplitHotSize(
104     "cg-use-split-hot-size",
105     cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
106              "call graph functions"),
107     cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
108 
109 } // namespace opts
110 
111 namespace llvm {
112 namespace bolt {
113 
114 using NodeId = CallGraph::NodeId;
115 using Arc = CallGraph::Arc;
116 using Node = CallGraph::Node;
117 
118 void ReorderFunctions::reorder(BinaryContext &BC,
119                                std::vector<Cluster> &&Clusters,
120                                std::map<uint64_t, BinaryFunction> &BFs) {
121   std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
122   uint64_t TotalSize = 0;
123   uint32_t Index = 0;
124 
125   // Set order of hot functions based on clusters.
126   for (const Cluster &Cluster : Clusters) {
127     for (const NodeId FuncId : Cluster.targets()) {
128       Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
129       FuncAddr[FuncId] = TotalSize;
130       TotalSize += Cg.size(FuncId);
131     }
132   }
133 
134   // Assign valid index for functions with valid profile.
135   for (auto &It : BFs) {
136     BinaryFunction &BF = It.second;
137     if (!BF.hasValidIndex() && BF.hasValidProfile())
138       BF.setIndex(Index++);
139   }
140 
141   if (opts::ReorderFunctions == RT_NONE)
142     return;
143 
144   printStats(BC, Clusters, FuncAddr);
145 }
146 
147 void ReorderFunctions::printStats(BinaryContext &BC,
148                                   const std::vector<Cluster> &Clusters,
149                                   const std::vector<uint64_t> &FuncAddr) {
150   if (opts::Verbosity == 0) {
151 #ifndef NDEBUG
152     if (!DebugFlag || !isCurrentDebugType("hfsort"))
153       return;
154 #else
155     return;
156 #endif
157   }
158 
159   bool PrintDetailed = opts::Verbosity > 1;
160 #ifndef NDEBUG
161   PrintDetailed |=
162       (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
163 #endif
164   uint64_t TotalSize = 0;
165   uint64_t CurPage = 0;
166   uint64_t Hotfuncs = 0;
167   double TotalDistance = 0;
168   double TotalCalls = 0;
169   double TotalCalls64B = 0;
170   double TotalCalls4KB = 0;
171   double TotalCalls2MB = 0;
172   if (PrintDetailed)
173     BC.outs() << "BOLT-INFO: Function reordering page layout\n"
174               << "BOLT-INFO: ============== page 0 ==============\n";
175   for (const Cluster &Cluster : Clusters) {
176     if (PrintDetailed)
177       BC.outs() << format(
178           "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
179           Cluster.density(), Cluster.samples(), Cluster.size());
180 
181     for (NodeId FuncId : Cluster.targets()) {
182       if (Cg.samples(FuncId) > 0) {
183         Hotfuncs++;
184 
185         if (PrintDetailed)
186           BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId)
187                     << " (" << Cg.size(FuncId) << ")\n";
188 
189         uint64_t Dist = 0;
190         uint64_t Calls = 0;
191         for (NodeId Dst : Cg.successors(FuncId)) {
192           if (FuncId == Dst) // ignore recursive calls in stats
193             continue;
194           const Arc &Arc = *Cg.findArc(FuncId, Dst);
195           const auto D = std::abs(FuncAddr[Arc.dst()] -
196                                   (FuncAddr[FuncId] + Arc.avgCallOffset()));
197           const double W = Arc.weight();
198           if (D < 64 && PrintDetailed && opts::Verbosity > 2)
199             BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n"
200                       << "BOLT-INFO:   Src: " << *Cg.nodeIdToFunc(FuncId)
201                       << "\n"
202                       << "BOLT-INFO:   Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
203                       << "BOLT-INFO:   Weight = " << W << "\n"
204                       << "BOLT-INFO:   AvgOffset = " << Arc.avgCallOffset()
205                       << "\n";
206           Calls += W;
207           if (D < 64)
208             TotalCalls64B += W;
209           if (D < 4096)
210             TotalCalls4KB += W;
211           if (D < (2 << 20))
212             TotalCalls2MB += W;
213           Dist += Arc.weight() * D;
214           if (PrintDetailed)
215             BC.outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
216                                 "weight = %.0lf, callDist = %f\n",
217                                 Arc.src(), FuncAddr[Arc.src()],
218                                 Arc.avgCallOffset(), Arc.dst(),
219                                 FuncAddr[Arc.dst()], Arc.weight(), D);
220         }
221         TotalCalls += Calls;
222         TotalDistance += Dist;
223         TotalSize += Cg.size(FuncId);
224 
225         if (PrintDetailed) {
226           BC.outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
227                               TotalSize, Calls ? Dist / Calls : 0)
228                     << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
229           const uint64_t NewPage = TotalSize / HugePageSize;
230           if (NewPage != CurPage) {
231             CurPage = NewPage;
232             BC.outs() << format(
233                 "BOLT-INFO: ============== page %u ==============\n", CurPage);
234           }
235         }
236       }
237     }
238   }
239   BC.outs() << "BOLT-INFO: Function reordering stats\n"
240             << format("BOLT-INFO:  Number of hot functions: %u\n"
241                       "BOLT-INFO:  Number of clusters: %lu\n",
242                       Hotfuncs, Clusters.size())
243             << format("BOLT-INFO:  Final average call distance = %.1lf "
244                       "(%.0lf / %.0lf)\n",
245                       TotalCalls ? TotalDistance / TotalCalls : 0,
246                       TotalDistance, TotalCalls)
247             << format("BOLT-INFO:  Total Calls = %.0lf\n", TotalCalls);
248   if (TotalCalls)
249     BC.outs()
250         << format("BOLT-INFO:  Total Calls within 64B = %.0lf (%.2lf%%)\n",
251                   TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
252         << format("BOLT-INFO:  Total Calls within 4KB = %.0lf (%.2lf%%)\n",
253                   TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
254         << format("BOLT-INFO:  Total Calls within 2MB = %.0lf (%.2lf%%)\n",
255                   TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
256 }
257 
258 Error ReorderFunctions::readFunctionOrderFile(
259     std::vector<std::string> &FunctionNames) {
260   std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
261   if (!FuncsFile)
262     return createFatalBOLTError(Twine("Ordered functions file \"") +
263                                 Twine(opts::FunctionOrderFile) +
264                                 Twine("\" can't be opened."));
265 
266   std::string FuncName;
267   while (std::getline(FuncsFile, FuncName))
268     FunctionNames.push_back(FuncName);
269   return Error::success();
270 }
271 
272 Error ReorderFunctions::runOnFunctions(BinaryContext &BC) {
273   auto &BFs = BC.getBinaryFunctions();
274   if (opts::ReorderFunctions != RT_NONE &&
275       opts::ReorderFunctions != RT_EXEC_COUNT &&
276       opts::ReorderFunctions != RT_USER) {
277     Cg = buildCallGraph(
278         BC,
279         [](const BinaryFunction &BF) {
280           if (!BF.hasProfile())
281             return true;
282           if (BF.getState() != BinaryFunction::State::CFG)
283             return true;
284           return false;
285         },
286         opts::CgFromPerfData,
287         /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
288         opts::CgUseSplitHotSize, opts::UseEdgeCounts,
289         opts::CgIgnoreRecursiveCalls);
290     Cg.normalizeArcWeights();
291   }
292 
293   std::vector<Cluster> Clusters;
294 
295   switch (opts::ReorderFunctions) {
296   case RT_NONE:
297     break;
298   case RT_EXEC_COUNT: {
299     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
300     llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
301                     [](BinaryFunction &BF) { return &BF; });
302     llvm::stable_sort(SortedFunctions,
303                       [&](const BinaryFunction *A, const BinaryFunction *B) {
304                         if (A->isIgnored())
305                           return false;
306                         if (B->isIgnored())
307                           return true;
308                         const size_t PadA = opts::padFunctionBefore(*A) +
309                                             opts::padFunctionAfter(*A);
310                         const size_t PadB = opts::padFunctionBefore(*B) +
311                                             opts::padFunctionAfter(*B);
312                         if (!PadA || !PadB) {
313                           if (PadA)
314                             return true;
315                           if (PadB)
316                             return false;
317                         }
318                         if (!A->hasProfile())
319                           return false;
320                         if (!B->hasProfile())
321                           return true;
322                         return A->getExecutionCount() > B->getExecutionCount();
323                       });
324     uint32_t Index = 0;
325     for (BinaryFunction *BF : SortedFunctions)
326       if (BF->hasProfile()) {
327         BF->setIndex(Index++);
328         LLVM_DEBUG(if (opts::Verbosity > 1) {
329           dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " ("
330                  << BF->getExecutionCount() << ")\n";
331         });
332       }
333   } break;
334   case RT_HFSORT:
335     Clusters = clusterize(Cg);
336     break;
337   case RT_CDSORT: {
338     // It is required that the sum of incoming arc weights is not greater
339     // than the number of samples for every function. Ensuring the call graph
340     // obeys the property before running the algorithm.
341     Cg.adjustArcWeights();
342 
343     // Initialize CFG nodes and their data
344     std::vector<uint64_t> FuncSizes;
345     std::vector<uint64_t> FuncCounts;
346     std::vector<codelayout::EdgeCount> CallCounts;
347     std::vector<uint64_t> CallOffsets;
348     for (NodeId F = 0; F < Cg.numNodes(); ++F) {
349       FuncSizes.push_back(Cg.size(F));
350       FuncCounts.push_back(Cg.samples(F));
351       for (NodeId Succ : Cg.successors(F)) {
352         const Arc &Arc = *Cg.findArc(F, Succ);
353         CallCounts.push_back({F, Succ, uint64_t(Arc.weight())});
354         CallOffsets.push_back(uint64_t(Arc.avgCallOffset()));
355       }
356     }
357 
358     // Run the layout algorithm.
359     std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout(
360         FuncSizes, FuncCounts, CallCounts, CallOffsets);
361 
362     // Create a single cluster from the computed order of hot functions.
363     std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end());
364     Clusters.emplace_back(Cluster(NodeOrder, Cg));
365   } break;
366   case RT_PETTIS_HANSEN:
367     Clusters = pettisAndHansen(Cg);
368     break;
369   case RT_RANDOM:
370     std::srand(opts::RandomSeed);
371     Clusters = randomClusters(Cg);
372     break;
373   case RT_USER: {
374     // Build LTOCommonNameMap
375     StringMap<std::vector<uint64_t>> LTOCommonNameMap;
376     for (const BinaryFunction &BF : llvm::make_second_range(BFs))
377       for (StringRef Name : BF.getNames())
378         if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
379           LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress());
380 
381     uint32_t Index = 0;
382     uint32_t InvalidEntries = 0;
383     std::vector<std::string> FunctionNames;
384     if (Error E = readFunctionOrderFile(FunctionNames))
385       return Error(std::move(E));
386 
387     for (const std::string &Function : FunctionNames) {
388       std::vector<uint64_t> FuncAddrs;
389 
390       BinaryData *BD = BC.getBinaryDataByName(Function);
391       if (!BD) {
392         // If we can't find the main symbol name, look for alternates.
393         uint32_t LocalID = 1;
394         while (true) {
395           const std::string FuncName = Function + "/" + std::to_string(LocalID);
396           BD = BC.getBinaryDataByName(FuncName);
397           if (BD)
398             FuncAddrs.push_back(BD->getAddress());
399           else
400             break;
401           LocalID++;
402         }
403         // Strip LTO suffixes
404         if (std::optional<StringRef> CommonName = getLTOCommonName(Function))
405           if (LTOCommonNameMap.contains(*CommonName))
406             llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]);
407       } else {
408         FuncAddrs.push_back(BD->getAddress());
409       }
410 
411       if (FuncAddrs.empty()) {
412         if (opts::Verbosity >= 1)
413           BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
414                     << "for " << Function << "\n";
415         ++InvalidEntries;
416         continue;
417       }
418 
419       for (const uint64_t FuncAddr : FuncAddrs) {
420         const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
421         assert(FuncBD);
422 
423         BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
424         if (!BF) {
425           if (opts::Verbosity >= 1)
426             BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
427                       << "for " << Function << "\n";
428           ++InvalidEntries;
429           break;
430         }
431         if (!BF->hasValidIndex())
432           BF->setIndex(Index++);
433         else if (opts::Verbosity > 0)
434           BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
435                     << "\n";
436       }
437     }
438     if (InvalidEntries)
439       BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
440                 << InvalidEntries << " entries in -function-order list\n";
441   } break;
442 
443   default:
444     llvm_unreachable("unexpected layout type");
445   }
446 
447   reorder(BC, std::move(Clusters), BFs);
448 
449   BC.HasFinalizedFunctionOrder = true;
450 
451   std::unique_ptr<std::ofstream> FuncsFile;
452   if (!opts::GenerateFunctionOrderFile.empty()) {
453     FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
454                                                 std::ios::out);
455     if (!FuncsFile) {
456       BC.errs() << "BOLT-ERROR: ordered functions file "
457                 << opts::GenerateFunctionOrderFile << " cannot be opened\n";
458       return createFatalBOLTError("");
459     }
460   }
461 
462   std::unique_ptr<std::ofstream> LinkSectionsFile;
463   if (!opts::LinkSectionsFile.empty()) {
464     LinkSectionsFile =
465         std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
466     if (!LinkSectionsFile) {
467       BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
468                 << " cannot be opened\n";
469       return createFatalBOLTError("");
470     }
471   }
472 
473   if (FuncsFile || LinkSectionsFile) {
474     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
475     llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
476                     [](BinaryFunction &BF) { return &BF; });
477 
478     // Sort functions by index.
479     llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex);
480 
481     for (const BinaryFunction *Func : SortedFunctions) {
482       if (!Func->hasValidIndex())
483         break;
484       if (Func->isPLTFunction())
485         continue;
486 
487       if (FuncsFile)
488         *FuncsFile << Func->getOneName().str() << '\n';
489 
490       if (LinkSectionsFile) {
491         const char *Indent = "";
492         std::vector<StringRef> AllNames = Func->getNames();
493         llvm::sort(AllNames);
494         for (StringRef Name : AllNames) {
495           const size_t SlashPos = Name.find('/');
496           if (SlashPos != std::string::npos) {
497             // Avoid duplicates for local functions.
498             if (Name.find('/', SlashPos + 1) != std::string::npos)
499               continue;
500             Name = Name.substr(0, SlashPos);
501           }
502           *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
503           Indent = " ";
504         }
505       }
506     }
507 
508     if (FuncsFile) {
509       FuncsFile->close();
510       BC.outs() << "BOLT-INFO: dumped function order to "
511                 << opts::GenerateFunctionOrderFile << '\n';
512     }
513 
514     if (LinkSectionsFile) {
515       LinkSectionsFile->close();
516       BC.outs() << "BOLT-INFO: dumped linker section order to "
517                 << opts::LinkSectionsFile << '\n';
518     }
519   }
520   return Error::success();
521 }
522 
523 } // namespace bolt
524 } // namespace llvm
525