xref: /llvm-project/bolt/lib/Passes/ReorderFunctions.cpp (revision 2b642926b40b1f5d2e9525332a60d9502a289383)
1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/CommandLine.h"
18 #include <fstream>
19 
20 #define DEBUG_TYPE "hfsort"
21 
22 using namespace llvm;
23 
24 namespace opts {
25 
26 extern cl::OptionCategory BoltOptCategory;
27 extern cl::opt<unsigned> Verbosity;
28 extern cl::opt<uint32_t> RandomSeed;
29 
30 extern size_t padFunction(const bolt::BinaryFunction &Function);
31 
32 cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
33     "reorder-functions",
34     cl::desc("reorder and cluster functions (works only with relocations)"),
35     cl::init(bolt::ReorderFunctions::RT_NONE),
36     cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none",
37                           "do not reorder functions"),
38                clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count",
39                           "order by execution count"),
40                clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
41                           "use hfsort algorithm"),
42                clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
43                           "use hfsort+ algorithm"),
44                clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
45                           "pettis-hansen", "use Pettis-Hansen algorithm"),
46                clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
47                           "reorder functions randomly"),
48                clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
49                           "use function order specified by -function-order")),
50     cl::ZeroOrMore, cl::cat(BoltOptCategory));
51 
52 static cl::opt<bool> ReorderFunctionsUseHotSize(
53     "reorder-functions-use-hot-size",
54     cl::desc("use a function's hot size when doing clustering"), cl::init(true),
55     cl::cat(BoltOptCategory));
56 
57 static cl::opt<std::string> FunctionOrderFile(
58     "function-order",
59     cl::desc("file containing an ordered list of functions to use for function "
60              "reordering"),
61     cl::cat(BoltOptCategory));
62 
63 static cl::opt<std::string> GenerateFunctionOrderFile(
64     "generate-function-order",
65     cl::desc("file to dump the ordered list of functions to use for function "
66              "reordering"),
67     cl::cat(BoltOptCategory));
68 
69 static cl::opt<std::string> LinkSectionsFile(
70     "generate-link-sections",
71     cl::desc("generate a list of function sections in a format suitable for "
72              "inclusion in a linker script"),
73     cl::cat(BoltOptCategory));
74 
75 static cl::opt<bool>
76     UseEdgeCounts("use-edge-counts",
77                   cl::desc("use edge count data when doing clustering"),
78                   cl::init(true), cl::cat(BoltOptCategory));
79 
80 static cl::opt<bool> CgFromPerfData(
81     "cg-from-perf-data",
82     cl::desc("use perf data directly when constructing the call graph"
83              " for stale functions"),
84     cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
85 
86 static cl::opt<bool> CgIgnoreRecursiveCalls(
87     "cg-ignore-recursive-calls",
88     cl::desc("ignore recursive calls when constructing the call graph"),
89     cl::init(true), cl::cat(BoltOptCategory));
90 
91 static cl::opt<bool> CgUseSplitHotSize(
92     "cg-use-split-hot-size",
93     cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
94              "call graph functions"),
95     cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
96 
97 } // namespace opts
98 
99 namespace llvm {
100 namespace bolt {
101 
102 using NodeId = CallGraph::NodeId;
103 using Arc = CallGraph::Arc;
104 using Node = CallGraph::Node;
105 
106 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
107                                std::map<uint64_t, BinaryFunction> &BFs) {
108   std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
109   uint64_t TotalSize = 0;
110   uint32_t Index = 0;
111 
112   // Set order of hot functions based on clusters.
113   for (const Cluster &Cluster : Clusters) {
114     for (const NodeId FuncId : Cluster.targets()) {
115       Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
116       FuncAddr[FuncId] = TotalSize;
117       TotalSize += Cg.size(FuncId);
118     }
119   }
120 
121   // Assign valid index for functions with valid profile.
122   for (auto &It : BFs) {
123     BinaryFunction &BF = It.second;
124     if (!BF.hasValidIndex() && BF.hasValidProfile())
125       BF.setIndex(Index++);
126   }
127 
128   if (opts::ReorderFunctions == RT_NONE)
129     return;
130 
131   printStats(Clusters, FuncAddr);
132 }
133 
134 void ReorderFunctions::printStats(const std::vector<Cluster> &Clusters,
135                                   const std::vector<uint64_t> &FuncAddr) {
136   if (opts::Verbosity == 0) {
137 #ifndef NDEBUG
138     if (!DebugFlag || !isCurrentDebugType("hfsort"))
139       return;
140 #else
141     return;
142 #endif
143   }
144 
145   bool PrintDetailed = opts::Verbosity > 1;
146 #ifndef NDEBUG
147   PrintDetailed |=
148       (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
149 #endif
150   uint64_t TotalSize = 0;
151   uint64_t CurPage = 0;
152   uint64_t Hotfuncs = 0;
153   double TotalDistance = 0;
154   double TotalCalls = 0;
155   double TotalCalls64B = 0;
156   double TotalCalls4KB = 0;
157   double TotalCalls2MB = 0;
158   if (PrintDetailed)
159     outs() << "BOLT-INFO: Function reordering page layout\n"
160            << "BOLT-INFO: ============== page 0 ==============\n";
161   for (const Cluster &Cluster : Clusters) {
162     if (PrintDetailed)
163       outs() << format(
164           "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
165           Cluster.density(), Cluster.samples(), Cluster.size());
166 
167     for (NodeId FuncId : Cluster.targets()) {
168       if (Cg.samples(FuncId) > 0) {
169         Hotfuncs++;
170 
171         if (PrintDetailed)
172           outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " ("
173                  << Cg.size(FuncId) << ")\n";
174 
175         uint64_t Dist = 0;
176         uint64_t Calls = 0;
177         for (NodeId Dst : Cg.successors(FuncId)) {
178           if (FuncId == Dst) // ignore recursive calls in stats
179             continue;
180           const Arc &Arc = *Cg.findArc(FuncId, Dst);
181           const auto D = std::abs(FuncAddr[Arc.dst()] -
182                                   (FuncAddr[FuncId] + Arc.avgCallOffset()));
183           const double W = Arc.weight();
184           if (D < 64 && PrintDetailed && opts::Verbosity > 2)
185             outs() << "BOLT-INFO: short (" << D << "B) call:\n"
186                    << "BOLT-INFO:   Src: " << *Cg.nodeIdToFunc(FuncId) << "\n"
187                    << "BOLT-INFO:   Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
188                    << "BOLT-INFO:   Weight = " << W << "\n"
189                    << "BOLT-INFO:   AvgOffset = " << Arc.avgCallOffset()
190                    << "\n";
191           Calls += W;
192           if (D < 64)
193             TotalCalls64B += W;
194           if (D < 4096)
195             TotalCalls4KB += W;
196           if (D < (2 << 20))
197             TotalCalls2MB += W;
198           Dist += Arc.weight() * D;
199           if (PrintDetailed)
200             outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
201                              "weight = %.0lf, callDist = %f\n",
202                              Arc.src(), FuncAddr[Arc.src()],
203                              Arc.avgCallOffset(), Arc.dst(),
204                              FuncAddr[Arc.dst()], Arc.weight(), D);
205         }
206         TotalCalls += Calls;
207         TotalDistance += Dist;
208         TotalSize += Cg.size(FuncId);
209 
210         if (PrintDetailed) {
211           outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
212                            TotalSize, Calls ? Dist / Calls : 0)
213                  << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
214           const uint64_t NewPage = TotalSize / HugePageSize;
215           if (NewPage != CurPage) {
216             CurPage = NewPage;
217             outs() << format(
218                 "BOLT-INFO: ============== page %u ==============\n", CurPage);
219           }
220         }
221       }
222     }
223   }
224   outs() << "BOLT-INFO: Function reordering stats\n"
225          << format("BOLT-INFO:  Number of hot functions: %u\n"
226                    "BOLT-INFO:  Number of clusters: %lu\n",
227                    Hotfuncs, Clusters.size())
228          << format("BOLT-INFO:  Final average call distance = %.1lf "
229                    "(%.0lf / %.0lf)\n",
230                    TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance,
231                    TotalCalls)
232          << format("BOLT-INFO:  Total Calls = %.0lf\n", TotalCalls);
233   if (TotalCalls)
234     outs() << format("BOLT-INFO:  Total Calls within 64B = %.0lf (%.2lf%%)\n",
235                      TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
236            << format("BOLT-INFO:  Total Calls within 4KB = %.0lf (%.2lf%%)\n",
237                      TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
238            << format("BOLT-INFO:  Total Calls within 2MB = %.0lf (%.2lf%%)\n",
239                      TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
240 }
241 
242 std::vector<std::string> ReorderFunctions::readFunctionOrderFile() {
243   std::vector<std::string> FunctionNames;
244   std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
245   if (!FuncsFile) {
246     errs() << "Ordered functions file \"" << opts::FunctionOrderFile
247            << "\" can't be opened.\n";
248     exit(1);
249   }
250   std::string FuncName;
251   while (std::getline(FuncsFile, FuncName))
252     FunctionNames.push_back(FuncName);
253   return FunctionNames;
254 }
255 
256 void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
257   auto &BFs = BC.getBinaryFunctions();
258   if (opts::ReorderFunctions != RT_NONE &&
259       opts::ReorderFunctions != RT_EXEC_COUNT &&
260       opts::ReorderFunctions != RT_USER) {
261     Cg = buildCallGraph(
262         BC,
263         [](const BinaryFunction &BF) {
264           if (!BF.hasProfile())
265             return true;
266           if (BF.getState() != BinaryFunction::State::CFG)
267             return true;
268           return false;
269         },
270         opts::CgFromPerfData,
271         /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
272         opts::CgUseSplitHotSize, opts::UseEdgeCounts,
273         opts::CgIgnoreRecursiveCalls);
274     Cg.normalizeArcWeights();
275   }
276 
277   std::vector<Cluster> Clusters;
278 
279   switch (opts::ReorderFunctions) {
280   case RT_NONE:
281     break;
282   case RT_EXEC_COUNT: {
283     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
284     uint32_t Index = 0;
285     llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
286                     [](BinaryFunction &BF) { return &BF; });
287     llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A,
288                                            const BinaryFunction *B) {
289       if (A->isIgnored())
290         return false;
291       const size_t PadA = opts::padFunction(*A);
292       const size_t PadB = opts::padFunction(*B);
293       if (!PadA || !PadB) {
294         if (PadA)
295           return true;
296         if (PadB)
297           return false;
298       }
299       return !A->hasProfile() && (B->hasProfile() || (A->getExecutionCount() >
300                                                       B->getExecutionCount()));
301     });
302     for (BinaryFunction *BF : SortedFunctions)
303       if (BF->hasProfile())
304         BF->setIndex(Index++);
305   } break;
306   case RT_HFSORT:
307     Clusters = clusterize(Cg);
308     break;
309   case RT_HFSORT_PLUS:
310     Clusters = hfsortPlus(Cg);
311     break;
312   case RT_PETTIS_HANSEN:
313     Clusters = pettisAndHansen(Cg);
314     break;
315   case RT_RANDOM:
316     std::srand(opts::RandomSeed);
317     Clusters = randomClusters(Cg);
318     break;
319   case RT_USER: {
320     // Build LTOCommonNameMap
321     StringMap<std::vector<uint64_t>> LTOCommonNameMap;
322     for (const BinaryFunction &BF : llvm::make_second_range(BFs))
323       for (StringRef Name : BF.getNames())
324         if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
325           LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress());
326 
327     uint32_t Index = 0;
328     uint32_t InvalidEntries = 0;
329     for (const std::string &Function : readFunctionOrderFile()) {
330       std::vector<uint64_t> FuncAddrs;
331 
332       BinaryData *BD = BC.getBinaryDataByName(Function);
333       if (!BD) {
334         // If we can't find the main symbol name, look for alternates.
335         uint32_t LocalID = 1;
336         while (true) {
337           const std::string FuncName = Function + "/" + std::to_string(LocalID);
338           BD = BC.getBinaryDataByName(FuncName);
339           if (BD)
340             FuncAddrs.push_back(BD->getAddress());
341           else
342             break;
343           LocalID++;
344         }
345         // Strip LTO suffixes
346         if (std::optional<StringRef> CommonName = getLTOCommonName(Function))
347           if (LTOCommonNameMap.contains(*CommonName))
348             llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]);
349       } else {
350         FuncAddrs.push_back(BD->getAddress());
351       }
352 
353       if (FuncAddrs.empty()) {
354         if (opts::Verbosity >= 1)
355           errs() << "BOLT-WARNING: Reorder functions: can't find function "
356                  << "for " << Function << "\n";
357         ++InvalidEntries;
358         continue;
359       }
360 
361       for (const uint64_t FuncAddr : FuncAddrs) {
362         const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
363         assert(FuncBD);
364 
365         BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
366         if (!BF) {
367           if (opts::Verbosity >= 1)
368             errs() << "BOLT-WARNING: Reorder functions: can't find function "
369                    << "for " << Function << "\n";
370           ++InvalidEntries;
371           break;
372         }
373         if (!BF->hasValidIndex())
374           BF->setIndex(Index++);
375         else if (opts::Verbosity > 0)
376           errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
377                  << "\n";
378       }
379     }
380     if (InvalidEntries)
381       errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
382              << InvalidEntries << " entries in -function-order list\n";
383   } break;
384   }
385 
386   reorder(std::move(Clusters), BFs);
387 
388   std::unique_ptr<std::ofstream> FuncsFile;
389   if (!opts::GenerateFunctionOrderFile.empty()) {
390     FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
391                                                 std::ios::out);
392     if (!FuncsFile) {
393       errs() << "BOLT-ERROR: ordered functions file "
394              << opts::GenerateFunctionOrderFile << " cannot be opened\n";
395       exit(1);
396     }
397   }
398 
399   std::unique_ptr<std::ofstream> LinkSectionsFile;
400   if (!opts::LinkSectionsFile.empty()) {
401     LinkSectionsFile =
402         std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
403     if (!LinkSectionsFile) {
404       errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
405              << " cannot be opened\n";
406       exit(1);
407     }
408   }
409 
410   if (FuncsFile || LinkSectionsFile) {
411     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
412     llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
413                     [](BinaryFunction &BF) { return &BF; });
414 
415     // Sort functions by index.
416     llvm::stable_sort(SortedFunctions,
417                       [](const BinaryFunction *A, const BinaryFunction *B) {
418                         if (A->hasValidIndex() && B->hasValidIndex())
419                           return A->getIndex() < B->getIndex();
420                         if (A->hasValidIndex() && !B->hasValidIndex())
421                           return true;
422                         if (!A->hasValidIndex() && B->hasValidIndex())
423                           return false;
424                         return A->getAddress() < B->getAddress();
425                       });
426 
427     for (const BinaryFunction *Func : SortedFunctions) {
428       if (!Func->hasValidIndex())
429         break;
430       if (Func->isPLTFunction())
431         continue;
432 
433       if (FuncsFile)
434         *FuncsFile << Func->getOneName().str() << '\n';
435 
436       if (LinkSectionsFile) {
437         const char *Indent = "";
438         std::vector<StringRef> AllNames = Func->getNames();
439         llvm::sort(AllNames);
440         for (StringRef Name : AllNames) {
441           const size_t SlashPos = Name.find('/');
442           if (SlashPos != std::string::npos) {
443             // Avoid duplicates for local functions.
444             if (Name.find('/', SlashPos + 1) != std::string::npos)
445               continue;
446             Name = Name.substr(0, SlashPos);
447           }
448           *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
449           Indent = " ";
450         }
451       }
452     }
453 
454     if (FuncsFile) {
455       FuncsFile->close();
456       outs() << "BOLT-INFO: dumped function order to "
457              << opts::GenerateFunctionOrderFile << '\n';
458     }
459 
460     if (LinkSectionsFile) {
461       LinkSectionsFile->close();
462       outs() << "BOLT-INFO: dumped linker section order to "
463              << opts::LinkSectionsFile << '\n';
464     }
465   }
466 }
467 
468 } // namespace bolt
469 } // namespace llvm
470