xref: /llvm-project/bolt/lib/Passes/ReorderFunctions.cpp (revision 287508cd9c4396c8845d92310d258879202a179e)
1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/CommandLine.h"
18 #include <fstream>
19 
20 #define DEBUG_TYPE "hfsort"
21 
22 using namespace llvm;
23 
24 namespace opts {
25 
26 extern cl::OptionCategory BoltOptCategory;
27 extern cl::opt<unsigned> Verbosity;
28 extern cl::opt<uint32_t> RandomSeed;
29 
30 extern size_t padFunction(const bolt::BinaryFunction &Function);
31 
32 cl::opt<bolt::ReorderFunctions::ReorderType>
33 ReorderFunctions("reorder-functions",
34   cl::desc("reorder and cluster functions (works only with relocations)"),
35   cl::init(bolt::ReorderFunctions::RT_NONE),
36   cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE,
37       "none",
38       "do not reorder functions"),
39     clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT,
40       "exec-count",
41       "order by execution count"),
42     clEnumValN(bolt::ReorderFunctions::RT_HFSORT,
43       "hfsort",
44       "use hfsort algorithm"),
45     clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS,
46       "hfsort+",
47       "use hfsort+ algorithm"),
48     clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
49       "pettis-hansen",
50       "use Pettis-Hansen algorithm"),
51     clEnumValN(bolt::ReorderFunctions::RT_RANDOM,
52       "random",
53       "reorder functions randomly"),
54     clEnumValN(bolt::ReorderFunctions::RT_USER,
55       "user",
56       "use function order specified by -function-order")),
57   cl::ZeroOrMore,
58   cl::cat(BoltOptCategory));
59 
60 static cl::opt<bool> ReorderFunctionsUseHotSize(
61     "reorder-functions-use-hot-size",
62     cl::desc("use a function's hot size when doing clustering"), cl::init(true),
63     cl::cat(BoltOptCategory));
64 
65 static cl::opt<std::string>
66 FunctionOrderFile("function-order",
67   cl::desc("file containing an ordered list of functions to use for function "
68            "reordering"),
69   cl::cat(BoltOptCategory));
70 
71 static cl::opt<std::string>
72 GenerateFunctionOrderFile("generate-function-order",
73   cl::desc("file to dump the ordered list of functions to use for function "
74            "reordering"),
75   cl::cat(BoltOptCategory));
76 
77 static cl::opt<std::string>
78 LinkSectionsFile("generate-link-sections",
79   cl::desc("generate a list of function sections in a format suitable for "
80            "inclusion in a linker script"),
81   cl::cat(BoltOptCategory));
82 
83 static cl::opt<bool>
84     UseEdgeCounts("use-edge-counts",
85                   cl::desc("use edge count data when doing clustering"),
86                   cl::init(true), cl::cat(BoltOptCategory));
87 
88 static cl::opt<bool>
89 CgFromPerfData("cg-from-perf-data",
90   cl::desc("use perf data directly when constructing the call graph"
91            " for stale functions"),
92   cl::init(true),
93   cl::ZeroOrMore,
94   cl::cat(BoltOptCategory));
95 
96 static cl::opt<bool> CgIgnoreRecursiveCalls(
97     "cg-ignore-recursive-calls",
98     cl::desc("ignore recursive calls when constructing the call graph"),
99     cl::init(true), cl::cat(BoltOptCategory));
100 
101 static cl::opt<bool>
102 CgUseSplitHotSize("cg-use-split-hot-size",
103   cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
104            "call graph functions"),
105   cl::init(false),
106   cl::ZeroOrMore,
107   cl::cat(BoltOptCategory));
108 
109 } // namespace opts
110 
111 namespace llvm {
112 namespace bolt {
113 
114 using NodeId = CallGraph::NodeId;
115 using Arc = CallGraph::Arc;
116 using Node = CallGraph::Node;
117 
118 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
119                                std::map<uint64_t, BinaryFunction> &BFs) {
120   std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
121   uint64_t TotalSize = 0;
122   uint32_t Index = 0;
123 
124   // Set order of hot functions based on clusters.
125   for (const Cluster &Cluster : Clusters) {
126     for (const NodeId FuncId : Cluster.targets()) {
127       Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
128       FuncAddr[FuncId] = TotalSize;
129       TotalSize += Cg.size(FuncId);
130     }
131   }
132 
133   // Assign valid index for functions with valid profile.
134   for (auto &It : BFs) {
135     BinaryFunction &BF = It.second;
136     if (!BF.hasValidIndex() && BF.hasValidProfile())
137       BF.setIndex(Index++);
138   }
139 
140   if (opts::ReorderFunctions == RT_NONE)
141     return;
142 
143   printStats(Clusters, FuncAddr);
144 }
145 
146 void ReorderFunctions::printStats(const std::vector<Cluster> &Clusters,
147                                   const std::vector<uint64_t> &FuncAddr) {
148   if (opts::Verbosity == 0) {
149 #ifndef NDEBUG
150     if (!DebugFlag || !isCurrentDebugType("hfsort"))
151       return;
152 #else
153     return;
154 #endif
155   }
156 
157   bool PrintDetailed = opts::Verbosity > 1;
158 #ifndef NDEBUG
159   PrintDetailed |=
160     (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
161 #endif
162   uint64_t TotalSize   = 0;
163   uint64_t CurPage     = 0;
164   uint64_t Hotfuncs    = 0;
165   double TotalDistance = 0;
166   double TotalCalls    = 0;
167   double TotalCalls64B = 0;
168   double TotalCalls4KB = 0;
169   double TotalCalls2MB = 0;
170   if (PrintDetailed)
171     outs() << "BOLT-INFO: Function reordering page layout\n"
172            << "BOLT-INFO: ============== page 0 ==============\n";
173   for (const Cluster &Cluster : Clusters) {
174     if (PrintDetailed)
175       outs() << format(
176           "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
177           Cluster.density(), Cluster.samples(), Cluster.size());
178 
179     for (NodeId FuncId : Cluster.targets()) {
180       if (Cg.samples(FuncId) > 0) {
181         Hotfuncs++;
182 
183         if (PrintDetailed)
184           outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " ("
185                  << Cg.size(FuncId) << ")\n";
186 
187         uint64_t Dist = 0;
188         uint64_t Calls = 0;
189         for (NodeId Dst : Cg.successors(FuncId)) {
190           if (FuncId == Dst) // ignore recursive calls in stats
191             continue;
192           const Arc &Arc = *Cg.findArc(FuncId, Dst);
193           const auto D = std::abs(FuncAddr[Arc.dst()] -
194                                   (FuncAddr[FuncId] + Arc.avgCallOffset()));
195           const double W = Arc.weight();
196           if (D < 64 && PrintDetailed && opts::Verbosity > 2)
197             outs() << "BOLT-INFO: short (" << D << "B) call:\n"
198                    << "BOLT-INFO:   Src: " << *Cg.nodeIdToFunc(FuncId) << "\n"
199                    << "BOLT-INFO:   Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
200                    << "BOLT-INFO:   Weight = " << W << "\n"
201                    << "BOLT-INFO:   AvgOffset = " << Arc.avgCallOffset() << "\n";
202           Calls += W;
203           if (D < 64)        TotalCalls64B += W;
204           if (D < 4096)      TotalCalls4KB += W;
205           if (D < (2 << 20)) TotalCalls2MB += W;
206           Dist += Arc.weight() * D;
207           if (PrintDetailed)
208             outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
209                              "weight = %.0lf, callDist = %f\n",
210                              Arc.src(),
211                              FuncAddr[Arc.src()],
212                              Arc.avgCallOffset(),
213                              Arc.dst(),
214                              FuncAddr[Arc.dst()],
215                              Arc.weight(), D);
216         }
217         TotalCalls += Calls;
218         TotalDistance += Dist;
219         TotalSize += Cg.size(FuncId);
220 
221         if (PrintDetailed) {
222           outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
223                            TotalSize, Calls ? Dist / Calls : 0)
224                  << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
225           const uint64_t NewPage = TotalSize / HugePageSize;
226           if (NewPage != CurPage) {
227             CurPage = NewPage;
228             outs() << format(
229                 "BOLT-INFO: ============== page %u ==============\n", CurPage);
230           }
231         }
232       }
233     }
234   }
235   outs() << "BOLT-INFO: Function reordering stats\n"
236          << format("BOLT-INFO:  Number of hot functions: %u\n"
237                    "BOLT-INFO:  Number of clusters: %lu\n",
238                    Hotfuncs, Clusters.size())
239          << format("BOLT-INFO:  Final average call distance = %.1lf "
240                    "(%.0lf / %.0lf)\n",
241                    TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance,
242                    TotalCalls)
243          << format("BOLT-INFO:  Total Calls = %.0lf\n", TotalCalls);
244   if (TotalCalls)
245     outs() << format("BOLT-INFO:  Total Calls within 64B = %.0lf (%.2lf%%)\n",
246                      TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
247            << format("BOLT-INFO:  Total Calls within 4KB = %.0lf (%.2lf%%)\n",
248                      TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
249            << format("BOLT-INFO:  Total Calls within 2MB = %.0lf (%.2lf%%)\n",
250                      TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
251 }
252 
253 std::vector<std::string> ReorderFunctions::readFunctionOrderFile() {
254   std::vector<std::string> FunctionNames;
255   std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
256   if (!FuncsFile) {
257     errs() << "Ordered functions file \"" << opts::FunctionOrderFile
258            << "\" can't be opened.\n";
259     exit(1);
260   }
261   std::string FuncName;
262   while (std::getline(FuncsFile, FuncName))
263     FunctionNames.push_back(FuncName);
264   return FunctionNames;
265 }
266 
267 void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
268   auto &BFs = BC.getBinaryFunctions();
269   if (opts::ReorderFunctions != RT_NONE &&
270       opts::ReorderFunctions != RT_EXEC_COUNT &&
271       opts::ReorderFunctions != RT_USER) {
272     Cg = buildCallGraph(
273         BC,
274         [](const BinaryFunction &BF) {
275           if (!BF.hasProfile())
276             return true;
277           if (BF.getState() != BinaryFunction::State::CFG)
278             return true;
279           return false;
280         },
281         opts::CgFromPerfData,
282         /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
283         opts::CgUseSplitHotSize, opts::UseEdgeCounts,
284         opts::CgIgnoreRecursiveCalls);
285     Cg.normalizeArcWeights();
286   }
287 
288   std::vector<Cluster> Clusters;
289 
290   switch (opts::ReorderFunctions) {
291   case RT_NONE:
292     break;
293   case RT_EXEC_COUNT:
294     {
295       std::vector<BinaryFunction *> SortedFunctions(BFs.size());
296       uint32_t Index = 0;
297       llvm::transform(BFs, SortedFunctions.begin(),
298                       [](std::pair<const uint64_t, BinaryFunction> &BFI) {
299                         return &BFI.second;
300                       });
301       llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A,
302                                              const BinaryFunction *B) {
303         if (A->isIgnored())
304           return false;
305         const size_t PadA = opts::padFunction(*A);
306         const size_t PadB = opts::padFunction(*B);
307         if (!PadA || !PadB) {
308           if (PadA)
309             return true;
310           if (PadB)
311             return false;
312         }
313         return !A->hasProfile() &&
314                (B->hasProfile() ||
315                 (A->getExecutionCount() > B->getExecutionCount()));
316       });
317       for (BinaryFunction *BF : SortedFunctions)
318         if (BF->hasProfile())
319           BF->setIndex(Index++);
320     }
321     break;
322   case RT_HFSORT:
323     Clusters = clusterize(Cg);
324     break;
325   case RT_HFSORT_PLUS:
326     Clusters = hfsortPlus(Cg);
327     break;
328   case RT_PETTIS_HANSEN:
329     Clusters = pettisAndHansen(Cg);
330     break;
331   case RT_RANDOM:
332     std::srand(opts::RandomSeed);
333     Clusters = randomClusters(Cg);
334     break;
335   case RT_USER:
336     {
337       // Build LTOCommonNameMap
338       StringMap<std::vector<uint64_t>> LTOCommonNameMap;
339       for (const BinaryFunction &BF : llvm::make_second_range(BFs))
340         for (StringRef Name : BF.getNames())
341           if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
342             LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress());
343 
344       uint32_t Index = 0;
345       uint32_t InvalidEntries = 0;
346       for (const std::string &Function : readFunctionOrderFile()) {
347         std::vector<uint64_t> FuncAddrs;
348 
349         BinaryData *BD = BC.getBinaryDataByName(Function);
350         if (!BD) {
351           // If we can't find the main symbol name, look for alternates.
352           uint32_t LocalID = 1;
353           while (true) {
354             const std::string FuncName =
355                 Function + "/" + std::to_string(LocalID);
356             BD = BC.getBinaryDataByName(FuncName);
357             if (BD)
358               FuncAddrs.push_back(BD->getAddress());
359             else
360               break;
361             LocalID++;
362           }
363           // Strip LTO suffixes
364           if (std::optional<StringRef> CommonName = getLTOCommonName(Function))
365             if (LTOCommonNameMap.find(*CommonName) != LTOCommonNameMap.end())
366               llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]);
367         } else {
368           FuncAddrs.push_back(BD->getAddress());
369         }
370 
371         if (FuncAddrs.empty()) {
372           if (opts::Verbosity >= 1)
373             errs() << "BOLT-WARNING: Reorder functions: can't find function "
374                    << "for " << Function << "\n";
375           ++InvalidEntries;
376           continue;
377         }
378 
379         for (const uint64_t FuncAddr : FuncAddrs) {
380           const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
381           assert(FuncBD);
382 
383           BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
384           if (!BF) {
385             if (opts::Verbosity >= 1)
386               errs() << "BOLT-WARNING: Reorder functions: can't find function "
387                      << "for " << Function << "\n";
388             ++InvalidEntries;
389             break;
390           }
391           if (!BF->hasValidIndex())
392             BF->setIndex(Index++);
393           else if (opts::Verbosity > 0)
394             errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
395                    << "\n";
396         }
397       }
398       if (InvalidEntries)
399         errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
400                << InvalidEntries << " entries in -function-order list\n";
401     }
402     break;
403   }
404 
405   reorder(std::move(Clusters), BFs);
406 
407   std::unique_ptr<std::ofstream> FuncsFile;
408   if (!opts::GenerateFunctionOrderFile.empty()) {
409     FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
410                                                 std::ios::out);
411     if (!FuncsFile) {
412       errs() << "BOLT-ERROR: ordered functions file "
413              << opts::GenerateFunctionOrderFile << " cannot be opened\n";
414       exit(1);
415     }
416   }
417 
418   std::unique_ptr<std::ofstream> LinkSectionsFile;
419   if (!opts::LinkSectionsFile.empty()) {
420     LinkSectionsFile =
421         std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
422     if (!LinkSectionsFile) {
423       errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
424              << " cannot be opened\n";
425       exit(1);
426     }
427   }
428 
429   if (FuncsFile || LinkSectionsFile) {
430     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
431     llvm::transform(BFs, SortedFunctions.begin(),
432                     [](std::pair<const uint64_t, BinaryFunction> &BFI) {
433                       return &BFI.second;
434                     });
435 
436     // Sort functions by index.
437     llvm::stable_sort(SortedFunctions,
438                       [](const BinaryFunction *A, const BinaryFunction *B) {
439                         if (A->hasValidIndex() && B->hasValidIndex())
440                           return A->getIndex() < B->getIndex();
441                         if (A->hasValidIndex() && !B->hasValidIndex())
442                           return true;
443                         if (!A->hasValidIndex() && B->hasValidIndex())
444                           return false;
445                         return A->getAddress() < B->getAddress();
446                       });
447 
448     for (const BinaryFunction *Func : SortedFunctions) {
449       if (!Func->hasValidIndex())
450         break;
451       if (Func->isPLTFunction())
452         continue;
453 
454       if (FuncsFile)
455         *FuncsFile << Func->getOneName().str() << '\n';
456 
457       if (LinkSectionsFile) {
458         const char *Indent = "";
459         std::vector<StringRef> AllNames = Func->getNames();
460         llvm::sort(AllNames);
461         for (StringRef Name : AllNames) {
462           const size_t SlashPos = Name.find('/');
463           if (SlashPos != std::string::npos) {
464             // Avoid duplicates for local functions.
465             if (Name.find('/', SlashPos + 1) != std::string::npos)
466               continue;
467             Name = Name.substr(0, SlashPos);
468           }
469           *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
470           Indent = " ";
471         }
472       }
473     }
474 
475     if (FuncsFile) {
476       FuncsFile->close();
477       outs() << "BOLT-INFO: dumped function order to "
478              << opts::GenerateFunctionOrderFile << '\n';
479     }
480 
481     if (LinkSectionsFile) {
482       LinkSectionsFile->close();
483       outs() << "BOLT-INFO: dumped linker section order to "
484              << opts::LinkSectionsFile << '\n';
485     }
486   }
487 }
488 
489 } // namespace bolt
490 } // namespace llvm
491