xref: /llvm-project/bolt/lib/Passes/ReorderFunctions.cpp (revision 4e585e51c167f458c3a9072298c3d4e32a7fce31)
1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/CommandLine.h"
18 #include <fstream>
19 
20 #define DEBUG_TYPE "hfsort"
21 
22 using namespace llvm;
23 
24 namespace opts {
25 
26 extern cl::OptionCategory BoltOptCategory;
27 extern cl::opt<unsigned> Verbosity;
28 extern cl::opt<uint32_t> RandomSeed;
29 
30 extern size_t padFunction(const bolt::BinaryFunction &Function);
31 
32 cl::opt<bolt::ReorderFunctions::ReorderType>
33 ReorderFunctions("reorder-functions",
34   cl::desc("reorder and cluster functions (works only with relocations)"),
35   cl::init(bolt::ReorderFunctions::RT_NONE),
36   cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE,
37       "none",
38       "do not reorder functions"),
39     clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT,
40       "exec-count",
41       "order by execution count"),
42     clEnumValN(bolt::ReorderFunctions::RT_HFSORT,
43       "hfsort",
44       "use hfsort algorithm"),
45     clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS,
46       "hfsort+",
47       "use hfsort+ algorithm"),
48     clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
49       "pettis-hansen",
50       "use Pettis-Hansen algorithm"),
51     clEnumValN(bolt::ReorderFunctions::RT_RANDOM,
52       "random",
53       "reorder functions randomly"),
54     clEnumValN(bolt::ReorderFunctions::RT_USER,
55       "user",
56       "use function order specified by -function-order")),
57   cl::ZeroOrMore,
58   cl::cat(BoltOptCategory));
59 
60 static cl::opt<bool> ReorderFunctionsUseHotSize(
61     "reorder-functions-use-hot-size",
62     cl::desc("use a function's hot size when doing clustering"), cl::init(true),
63     cl::cat(BoltOptCategory));
64 
65 static cl::opt<std::string>
66 FunctionOrderFile("function-order",
67   cl::desc("file containing an ordered list of functions to use for function "
68            "reordering"),
69   cl::cat(BoltOptCategory));
70 
71 static cl::opt<std::string>
72 GenerateFunctionOrderFile("generate-function-order",
73   cl::desc("file to dump the ordered list of functions to use for function "
74            "reordering"),
75   cl::cat(BoltOptCategory));
76 
77 static cl::opt<std::string>
78 LinkSectionsFile("generate-link-sections",
79   cl::desc("generate a list of function sections in a format suitable for "
80            "inclusion in a linker script"),
81   cl::cat(BoltOptCategory));
82 
83 static cl::opt<bool>
84     UseEdgeCounts("use-edge-counts",
85                   cl::desc("use edge count data when doing clustering"),
86                   cl::init(true), cl::cat(BoltOptCategory));
87 
88 static cl::opt<bool>
89 CgFromPerfData("cg-from-perf-data",
90   cl::desc("use perf data directly when constructing the call graph"
91            " for stale functions"),
92   cl::init(true),
93   cl::ZeroOrMore,
94   cl::cat(BoltOptCategory));
95 
96 static cl::opt<bool> CgIgnoreRecursiveCalls(
97     "cg-ignore-recursive-calls",
98     cl::desc("ignore recursive calls when constructing the call graph"),
99     cl::init(true), cl::cat(BoltOptCategory));
100 
101 static cl::opt<bool>
102 CgUseSplitHotSize("cg-use-split-hot-size",
103   cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
104            "call graph functions"),
105   cl::init(false),
106   cl::ZeroOrMore,
107   cl::cat(BoltOptCategory));
108 
109 } // namespace opts
110 
111 namespace llvm {
112 namespace bolt {
113 
114 using NodeId = CallGraph::NodeId;
115 using Arc = CallGraph::Arc;
116 using Node = CallGraph::Node;
117 
118 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
119                                std::map<uint64_t, BinaryFunction> &BFs) {
120   std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
121   uint64_t TotalSize = 0;
122   uint32_t Index = 0;
123 
124   // Set order of hot functions based on clusters.
125   for (const Cluster &Cluster : Clusters) {
126     for (const NodeId FuncId : Cluster.targets()) {
127       Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
128       FuncAddr[FuncId] = TotalSize;
129       TotalSize += Cg.size(FuncId);
130     }
131   }
132 
133   // Assign valid index for functions with valid profile.
134   for (auto &It : BFs) {
135     BinaryFunction &BF = It.second;
136     if (!BF.hasValidIndex() && BF.hasValidProfile())
137       BF.setIndex(Index++);
138   }
139 
140   if (opts::ReorderFunctions == RT_NONE)
141     return;
142 
143   printStats(Clusters, FuncAddr);
144 }
145 
146 void ReorderFunctions::printStats(const std::vector<Cluster> &Clusters,
147                                   const std::vector<uint64_t> &FuncAddr) {
148   if (opts::Verbosity == 0) {
149 #ifndef NDEBUG
150     if (!DebugFlag || !isCurrentDebugType("hfsort"))
151       return;
152 #else
153     return;
154 #endif
155   }
156 
157   bool PrintDetailed = opts::Verbosity > 1;
158 #ifndef NDEBUG
159   PrintDetailed |=
160     (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
161 #endif
162   uint64_t TotalSize   = 0;
163   uint64_t CurPage     = 0;
164   uint64_t Hotfuncs    = 0;
165   double TotalDistance = 0;
166   double TotalCalls    = 0;
167   double TotalCalls64B = 0;
168   double TotalCalls4KB = 0;
169   double TotalCalls2MB = 0;
170   if (PrintDetailed)
171     outs() << "BOLT-INFO: Function reordering page layout\n"
172            << "BOLT-INFO: ============== page 0 ==============\n";
173   for (const Cluster &Cluster : Clusters) {
174     if (PrintDetailed)
175       outs() << format(
176           "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
177           Cluster.density(), Cluster.samples(), Cluster.size());
178 
179     for (NodeId FuncId : Cluster.targets()) {
180       if (Cg.samples(FuncId) > 0) {
181         Hotfuncs++;
182 
183         if (PrintDetailed)
184           outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " ("
185                  << Cg.size(FuncId) << ")\n";
186 
187         uint64_t Dist = 0;
188         uint64_t Calls = 0;
189         for (NodeId Dst : Cg.successors(FuncId)) {
190           if (FuncId == Dst) // ignore recursive calls in stats
191             continue;
192           const Arc &Arc = *Cg.findArc(FuncId, Dst);
193           const auto D = std::abs(FuncAddr[Arc.dst()] -
194                                   (FuncAddr[FuncId] + Arc.avgCallOffset()));
195           const double W = Arc.weight();
196           if (D < 64 && PrintDetailed && opts::Verbosity > 2)
197             outs() << "BOLT-INFO: short (" << D << "B) call:\n"
198                    << "BOLT-INFO:   Src: " << *Cg.nodeIdToFunc(FuncId) << "\n"
199                    << "BOLT-INFO:   Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
200                    << "BOLT-INFO:   Weight = " << W << "\n"
201                    << "BOLT-INFO:   AvgOffset = " << Arc.avgCallOffset() << "\n";
202           Calls += W;
203           if (D < 64)        TotalCalls64B += W;
204           if (D < 4096)      TotalCalls4KB += W;
205           if (D < (2 << 20)) TotalCalls2MB += W;
206           Dist += Arc.weight() * D;
207           if (PrintDetailed)
208             outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
209                              "weight = %.0lf, callDist = %f\n",
210                              Arc.src(),
211                              FuncAddr[Arc.src()],
212                              Arc.avgCallOffset(),
213                              Arc.dst(),
214                              FuncAddr[Arc.dst()],
215                              Arc.weight(), D);
216         }
217         TotalCalls += Calls;
218         TotalDistance += Dist;
219         TotalSize += Cg.size(FuncId);
220 
221         if (PrintDetailed) {
222           outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
223                            TotalSize, Calls ? Dist / Calls : 0)
224                  << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
225           const uint64_t NewPage = TotalSize / HugePageSize;
226           if (NewPage != CurPage) {
227             CurPage = NewPage;
228             outs() << format(
229                 "BOLT-INFO: ============== page %u ==============\n", CurPage);
230           }
231         }
232       }
233     }
234   }
235   outs() << "BOLT-INFO: Function reordering stats\n"
236          << format("BOLT-INFO:  Number of hot functions: %u\n"
237                    "BOLT-INFO:  Number of clusters: %lu\n",
238                    Hotfuncs, Clusters.size())
239          << format("BOLT-INFO:  Final average call distance = %.1lf "
240                    "(%.0lf / %.0lf)\n",
241                    TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance,
242                    TotalCalls)
243          << format("BOLT-INFO:  Total Calls = %.0lf\n", TotalCalls);
244   if (TotalCalls)
245     outs() << format("BOLT-INFO:  Total Calls within 64B = %.0lf (%.2lf%%)\n",
246                      TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
247            << format("BOLT-INFO:  Total Calls within 4KB = %.0lf (%.2lf%%)\n",
248                      TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
249            << format("BOLT-INFO:  Total Calls within 2MB = %.0lf (%.2lf%%)\n",
250                      TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
251 }
252 
253 std::vector<std::string> ReorderFunctions::readFunctionOrderFile() {
254   std::vector<std::string> FunctionNames;
255   std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
256   if (!FuncsFile) {
257     errs() << "Ordered functions file \"" << opts::FunctionOrderFile
258            << "\" can't be opened.\n";
259     exit(1);
260   }
261   std::string FuncName;
262   while (std::getline(FuncsFile, FuncName))
263     FunctionNames.push_back(FuncName);
264   return FunctionNames;
265 }
266 
267 void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
268   auto &BFs = BC.getBinaryFunctions();
269   if (opts::ReorderFunctions != RT_NONE &&
270       opts::ReorderFunctions != RT_EXEC_COUNT &&
271       opts::ReorderFunctions != RT_USER) {
272     Cg = buildCallGraph(
273         BC,
274         [](const BinaryFunction &BF) {
275           if (!BF.hasProfile())
276             return true;
277           if (BF.getState() != BinaryFunction::State::CFG)
278             return true;
279           return false;
280         },
281         opts::CgFromPerfData,
282         /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
283         opts::CgUseSplitHotSize, opts::UseEdgeCounts,
284         opts::CgIgnoreRecursiveCalls);
285     Cg.normalizeArcWeights();
286   }
287 
288   std::vector<Cluster> Clusters;
289 
290   switch (opts::ReorderFunctions) {
291   case RT_NONE:
292     break;
293   case RT_EXEC_COUNT:
294     {
295       std::vector<BinaryFunction *> SortedFunctions(BFs.size());
296       uint32_t Index = 0;
297       llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
298                       [](BinaryFunction &BF) { return &BF; });
299       llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A,
300                                              const BinaryFunction *B) {
301         if (A->isIgnored())
302           return false;
303         const size_t PadA = opts::padFunction(*A);
304         const size_t PadB = opts::padFunction(*B);
305         if (!PadA || !PadB) {
306           if (PadA)
307             return true;
308           if (PadB)
309             return false;
310         }
311         return !A->hasProfile() &&
312                (B->hasProfile() ||
313                 (A->getExecutionCount() > B->getExecutionCount()));
314       });
315       for (BinaryFunction *BF : SortedFunctions)
316         if (BF->hasProfile())
317           BF->setIndex(Index++);
318     }
319     break;
320   case RT_HFSORT:
321     Clusters = clusterize(Cg);
322     break;
323   case RT_HFSORT_PLUS:
324     Clusters = hfsortPlus(Cg);
325     break;
326   case RT_PETTIS_HANSEN:
327     Clusters = pettisAndHansen(Cg);
328     break;
329   case RT_RANDOM:
330     std::srand(opts::RandomSeed);
331     Clusters = randomClusters(Cg);
332     break;
333   case RT_USER:
334     {
335       // Build LTOCommonNameMap
336       StringMap<std::vector<uint64_t>> LTOCommonNameMap;
337       for (const BinaryFunction &BF : llvm::make_second_range(BFs))
338         for (StringRef Name : BF.getNames())
339           if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
340             LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress());
341 
342       uint32_t Index = 0;
343       uint32_t InvalidEntries = 0;
344       for (const std::string &Function : readFunctionOrderFile()) {
345         std::vector<uint64_t> FuncAddrs;
346 
347         BinaryData *BD = BC.getBinaryDataByName(Function);
348         if (!BD) {
349           // If we can't find the main symbol name, look for alternates.
350           uint32_t LocalID = 1;
351           while (true) {
352             const std::string FuncName =
353                 Function + "/" + std::to_string(LocalID);
354             BD = BC.getBinaryDataByName(FuncName);
355             if (BD)
356               FuncAddrs.push_back(BD->getAddress());
357             else
358               break;
359             LocalID++;
360           }
361           // Strip LTO suffixes
362           if (std::optional<StringRef> CommonName = getLTOCommonName(Function))
363             if (LTOCommonNameMap.contains(*CommonName))
364               llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]);
365         } else {
366           FuncAddrs.push_back(BD->getAddress());
367         }
368 
369         if (FuncAddrs.empty()) {
370           if (opts::Verbosity >= 1)
371             errs() << "BOLT-WARNING: Reorder functions: can't find function "
372                    << "for " << Function << "\n";
373           ++InvalidEntries;
374           continue;
375         }
376 
377         for (const uint64_t FuncAddr : FuncAddrs) {
378           const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
379           assert(FuncBD);
380 
381           BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
382           if (!BF) {
383             if (opts::Verbosity >= 1)
384               errs() << "BOLT-WARNING: Reorder functions: can't find function "
385                      << "for " << Function << "\n";
386             ++InvalidEntries;
387             break;
388           }
389           if (!BF->hasValidIndex())
390             BF->setIndex(Index++);
391           else if (opts::Verbosity > 0)
392             errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
393                    << "\n";
394         }
395       }
396       if (InvalidEntries)
397         errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
398                << InvalidEntries << " entries in -function-order list\n";
399     }
400     break;
401   }
402 
403   reorder(std::move(Clusters), BFs);
404 
405   std::unique_ptr<std::ofstream> FuncsFile;
406   if (!opts::GenerateFunctionOrderFile.empty()) {
407     FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
408                                                 std::ios::out);
409     if (!FuncsFile) {
410       errs() << "BOLT-ERROR: ordered functions file "
411              << opts::GenerateFunctionOrderFile << " cannot be opened\n";
412       exit(1);
413     }
414   }
415 
416   std::unique_ptr<std::ofstream> LinkSectionsFile;
417   if (!opts::LinkSectionsFile.empty()) {
418     LinkSectionsFile =
419         std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
420     if (!LinkSectionsFile) {
421       errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
422              << " cannot be opened\n";
423       exit(1);
424     }
425   }
426 
427   if (FuncsFile || LinkSectionsFile) {
428     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
429     llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
430                     [](BinaryFunction &BF) { return &BF; });
431 
432     // Sort functions by index.
433     llvm::stable_sort(SortedFunctions,
434                       [](const BinaryFunction *A, const BinaryFunction *B) {
435                         if (A->hasValidIndex() && B->hasValidIndex())
436                           return A->getIndex() < B->getIndex();
437                         if (A->hasValidIndex() && !B->hasValidIndex())
438                           return true;
439                         if (!A->hasValidIndex() && B->hasValidIndex())
440                           return false;
441                         return A->getAddress() < B->getAddress();
442                       });
443 
444     for (const BinaryFunction *Func : SortedFunctions) {
445       if (!Func->hasValidIndex())
446         break;
447       if (Func->isPLTFunction())
448         continue;
449 
450       if (FuncsFile)
451         *FuncsFile << Func->getOneName().str() << '\n';
452 
453       if (LinkSectionsFile) {
454         const char *Indent = "";
455         std::vector<StringRef> AllNames = Func->getNames();
456         llvm::sort(AllNames);
457         for (StringRef Name : AllNames) {
458           const size_t SlashPos = Name.find('/');
459           if (SlashPos != std::string::npos) {
460             // Avoid duplicates for local functions.
461             if (Name.find('/', SlashPos + 1) != std::string::npos)
462               continue;
463             Name = Name.substr(0, SlashPos);
464           }
465           *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
466           Indent = " ";
467         }
468       }
469     }
470 
471     if (FuncsFile) {
472       FuncsFile->close();
473       outs() << "BOLT-INFO: dumped function order to "
474              << opts::GenerateFunctionOrderFile << '\n';
475     }
476 
477     if (LinkSectionsFile) {
478       LinkSectionsFile->close();
479       outs() << "BOLT-INFO: dumped linker section order to "
480              << opts::LinkSectionsFile << '\n';
481     }
482   }
483 }
484 
485 } // namespace bolt
486 } // namespace llvm
487