xref: /llvm-project/bolt/lib/Passes/ReorderFunctions.cpp (revision 14dcf8214f9c66172d17c1cfaec6aec0030748e0)
1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Transforms/Utils/CodeLayout.h"
19 #include <fstream>
20 
21 #define DEBUG_TYPE "hfsort"
22 
23 using namespace llvm;
24 
25 namespace opts {
26 
27 extern cl::OptionCategory BoltOptCategory;
28 extern cl::opt<unsigned> Verbosity;
29 extern cl::opt<uint32_t> RandomSeed;
30 
31 extern size_t padFunction(const cl::list<std::string> &Spec,
32                           const bolt::BinaryFunction &Function);
33 extern cl::list<std::string> FunctionPadSpec, FunctionPadBeforeSpec;
34 
35 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
36 cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
37     "reorder-functions",
38     cl::desc("reorder and cluster functions (works only with relocations)"),
39     cl::init(bolt::ReorderFunctions::RT_NONE),
40     cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none",
41                           "do not reorder functions"),
42                clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count",
43                           "order by execution count"),
44                clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
45                           "use hfsort algorithm"),
46                clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
47                           "use cache-directed sort"),
48                clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort",
49                           "use cache-directed sort"),
50                clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
51                           "pettis-hansen", "use Pettis-Hansen algorithm"),
52                clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
53                           "reorder functions randomly"),
54                clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
55                           "use function order specified by -function-order")),
56     cl::ZeroOrMore, cl::cat(BoltOptCategory),
57     cl::callback([](const bolt::ReorderFunctions::ReorderType &option) {
58       if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) {
59         errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated,"
60                << " please use '-reorder-functions=cdsort' instead\n";
61         ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT;
62       }
63     }));
64 
65 static cl::opt<bool> ReorderFunctionsUseHotSize(
66     "reorder-functions-use-hot-size",
67     cl::desc("use a function's hot size when doing clustering"), cl::init(true),
68     cl::cat(BoltOptCategory));
69 
70 static cl::opt<std::string> FunctionOrderFile(
71     "function-order",
72     cl::desc("file containing an ordered list of functions to use for function "
73              "reordering"),
74     cl::cat(BoltOptCategory));
75 
76 static cl::opt<std::string> GenerateFunctionOrderFile(
77     "generate-function-order",
78     cl::desc("file to dump the ordered list of functions to use for function "
79              "reordering"),
80     cl::cat(BoltOptCategory));
81 
82 static cl::opt<std::string> LinkSectionsFile(
83     "generate-link-sections",
84     cl::desc("generate a list of function sections in a format suitable for "
85              "inclusion in a linker script"),
86     cl::cat(BoltOptCategory));
87 
88 static cl::opt<bool>
89     UseEdgeCounts("use-edge-counts",
90                   cl::desc("use edge count data when doing clustering"),
91                   cl::init(true), cl::cat(BoltOptCategory));
92 
93 static cl::opt<bool> CgFromPerfData(
94     "cg-from-perf-data",
95     cl::desc("use perf data directly when constructing the call graph"
96              " for stale functions"),
97     cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
98 
99 static cl::opt<bool> CgIgnoreRecursiveCalls(
100     "cg-ignore-recursive-calls",
101     cl::desc("ignore recursive calls when constructing the call graph"),
102     cl::init(true), cl::cat(BoltOptCategory));
103 
104 static cl::opt<bool> CgUseSplitHotSize(
105     "cg-use-split-hot-size",
106     cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
107              "call graph functions"),
108     cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
109 
110 } // namespace opts
111 
112 namespace llvm {
113 namespace bolt {
114 
115 using NodeId = CallGraph::NodeId;
116 using Arc = CallGraph::Arc;
117 using Node = CallGraph::Node;
118 
119 void ReorderFunctions::reorder(BinaryContext &BC,
120                                std::vector<Cluster> &&Clusters,
121                                std::map<uint64_t, BinaryFunction> &BFs) {
122   std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
123   uint64_t TotalSize = 0;
124   uint32_t Index = 0;
125 
126   // Set order of hot functions based on clusters.
127   for (const Cluster &Cluster : Clusters) {
128     for (const NodeId FuncId : Cluster.targets()) {
129       Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
130       FuncAddr[FuncId] = TotalSize;
131       TotalSize += Cg.size(FuncId);
132     }
133   }
134 
135   // Assign valid index for functions with valid profile.
136   for (auto &It : BFs) {
137     BinaryFunction &BF = It.second;
138     if (!BF.hasValidIndex() && BF.hasValidProfile())
139       BF.setIndex(Index++);
140   }
141 
142   if (opts::ReorderFunctions == RT_NONE)
143     return;
144 
145   printStats(BC, Clusters, FuncAddr);
146 }
147 
148 void ReorderFunctions::printStats(BinaryContext &BC,
149                                   const std::vector<Cluster> &Clusters,
150                                   const std::vector<uint64_t> &FuncAddr) {
151   if (opts::Verbosity == 0) {
152 #ifndef NDEBUG
153     if (!DebugFlag || !isCurrentDebugType("hfsort"))
154       return;
155 #else
156     return;
157 #endif
158   }
159 
160   bool PrintDetailed = opts::Verbosity > 1;
161 #ifndef NDEBUG
162   PrintDetailed |=
163       (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
164 #endif
165   uint64_t TotalSize = 0;
166   uint64_t CurPage = 0;
167   uint64_t Hotfuncs = 0;
168   double TotalDistance = 0;
169   double TotalCalls = 0;
170   double TotalCalls64B = 0;
171   double TotalCalls4KB = 0;
172   double TotalCalls2MB = 0;
173   if (PrintDetailed)
174     BC.outs() << "BOLT-INFO: Function reordering page layout\n"
175               << "BOLT-INFO: ============== page 0 ==============\n";
176   for (const Cluster &Cluster : Clusters) {
177     if (PrintDetailed)
178       BC.outs() << format(
179           "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
180           Cluster.density(), Cluster.samples(), Cluster.size());
181 
182     for (NodeId FuncId : Cluster.targets()) {
183       if (Cg.samples(FuncId) > 0) {
184         Hotfuncs++;
185 
186         if (PrintDetailed)
187           BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId)
188                     << " (" << Cg.size(FuncId) << ")\n";
189 
190         uint64_t Dist = 0;
191         uint64_t Calls = 0;
192         for (NodeId Dst : Cg.successors(FuncId)) {
193           if (FuncId == Dst) // ignore recursive calls in stats
194             continue;
195           const Arc &Arc = *Cg.findArc(FuncId, Dst);
196           const auto D = std::abs(FuncAddr[Arc.dst()] -
197                                   (FuncAddr[FuncId] + Arc.avgCallOffset()));
198           const double W = Arc.weight();
199           if (D < 64 && PrintDetailed && opts::Verbosity > 2)
200             BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n"
201                       << "BOLT-INFO:   Src: " << *Cg.nodeIdToFunc(FuncId)
202                       << "\n"
203                       << "BOLT-INFO:   Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
204                       << "BOLT-INFO:   Weight = " << W << "\n"
205                       << "BOLT-INFO:   AvgOffset = " << Arc.avgCallOffset()
206                       << "\n";
207           Calls += W;
208           if (D < 64)
209             TotalCalls64B += W;
210           if (D < 4096)
211             TotalCalls4KB += W;
212           if (D < (2 << 20))
213             TotalCalls2MB += W;
214           Dist += Arc.weight() * D;
215           if (PrintDetailed)
216             BC.outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
217                                 "weight = %.0lf, callDist = %f\n",
218                                 Arc.src(), FuncAddr[Arc.src()],
219                                 Arc.avgCallOffset(), Arc.dst(),
220                                 FuncAddr[Arc.dst()], Arc.weight(), D);
221         }
222         TotalCalls += Calls;
223         TotalDistance += Dist;
224         TotalSize += Cg.size(FuncId);
225 
226         if (PrintDetailed) {
227           BC.outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
228                               TotalSize, Calls ? Dist / Calls : 0)
229                     << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
230           const uint64_t NewPage = TotalSize / HugePageSize;
231           if (NewPage != CurPage) {
232             CurPage = NewPage;
233             BC.outs() << format(
234                 "BOLT-INFO: ============== page %u ==============\n", CurPage);
235           }
236         }
237       }
238     }
239   }
240   BC.outs() << "BOLT-INFO: Function reordering stats\n"
241             << format("BOLT-INFO:  Number of hot functions: %u\n"
242                       "BOLT-INFO:  Number of clusters: %lu\n",
243                       Hotfuncs, Clusters.size())
244             << format("BOLT-INFO:  Final average call distance = %.1lf "
245                       "(%.0lf / %.0lf)\n",
246                       TotalCalls ? TotalDistance / TotalCalls : 0,
247                       TotalDistance, TotalCalls)
248             << format("BOLT-INFO:  Total Calls = %.0lf\n", TotalCalls);
249   if (TotalCalls)
250     BC.outs()
251         << format("BOLT-INFO:  Total Calls within 64B = %.0lf (%.2lf%%)\n",
252                   TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
253         << format("BOLT-INFO:  Total Calls within 4KB = %.0lf (%.2lf%%)\n",
254                   TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
255         << format("BOLT-INFO:  Total Calls within 2MB = %.0lf (%.2lf%%)\n",
256                   TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
257 }
258 
259 Error ReorderFunctions::readFunctionOrderFile(
260     std::vector<std::string> &FunctionNames) {
261   std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
262   if (!FuncsFile)
263     return createFatalBOLTError(Twine("Ordered functions file \"") +
264                                 Twine(opts::FunctionOrderFile) +
265                                 Twine("\" can't be opened."));
266 
267   std::string FuncName;
268   while (std::getline(FuncsFile, FuncName))
269     FunctionNames.push_back(FuncName);
270   return Error::success();
271 }
272 
273 Error ReorderFunctions::runOnFunctions(BinaryContext &BC) {
274   auto &BFs = BC.getBinaryFunctions();
275   if (opts::ReorderFunctions != RT_NONE &&
276       opts::ReorderFunctions != RT_EXEC_COUNT &&
277       opts::ReorderFunctions != RT_USER) {
278     Cg = buildCallGraph(
279         BC,
280         [](const BinaryFunction &BF) {
281           if (!BF.hasProfile())
282             return true;
283           if (BF.getState() != BinaryFunction::State::CFG)
284             return true;
285           return false;
286         },
287         opts::CgFromPerfData,
288         /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
289         opts::CgUseSplitHotSize, opts::UseEdgeCounts,
290         opts::CgIgnoreRecursiveCalls);
291     Cg.normalizeArcWeights();
292   }
293 
294   std::vector<Cluster> Clusters;
295 
296   switch (opts::ReorderFunctions) {
297   case RT_NONE:
298     break;
299   case RT_EXEC_COUNT: {
300     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
301     llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
302                     [](BinaryFunction &BF) { return &BF; });
303     llvm::stable_sort(SortedFunctions,
304                       [&](const BinaryFunction *A, const BinaryFunction *B) {
305                         if (A->isIgnored())
306                           return false;
307                         if (B->isIgnored())
308                           return true;
309                         const size_t PadA =
310                             opts::padFunction(opts::FunctionPadSpec, *A) +
311                             opts::padFunction(opts::FunctionPadBeforeSpec, *A);
312                         const size_t PadB =
313                             opts::padFunction(opts::FunctionPadSpec, *B) +
314                             opts::padFunction(opts::FunctionPadBeforeSpec, *B);
315                         if (!PadA || !PadB) {
316                           if (PadA)
317                             return true;
318                           if (PadB)
319                             return false;
320                         }
321                         if (!A->hasProfile())
322                           return false;
323                         if (!B->hasProfile())
324                           return true;
325                         return A->getExecutionCount() > B->getExecutionCount();
326                       });
327     uint32_t Index = 0;
328     for (BinaryFunction *BF : SortedFunctions)
329       if (BF->hasProfile()) {
330         BF->setIndex(Index++);
331         LLVM_DEBUG(if (opts::Verbosity > 1) {
332           dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " ("
333                  << BF->getExecutionCount() << ")\n";
334         });
335       }
336   } break;
337   case RT_HFSORT:
338     Clusters = clusterize(Cg);
339     break;
340   case RT_CDSORT: {
341     // It is required that the sum of incoming arc weights is not greater
342     // than the number of samples for every function. Ensuring the call graph
343     // obeys the property before running the algorithm.
344     Cg.adjustArcWeights();
345 
346     // Initialize CFG nodes and their data
347     std::vector<uint64_t> FuncSizes;
348     std::vector<uint64_t> FuncCounts;
349     std::vector<codelayout::EdgeCount> CallCounts;
350     std::vector<uint64_t> CallOffsets;
351     for (NodeId F = 0; F < Cg.numNodes(); ++F) {
352       FuncSizes.push_back(Cg.size(F));
353       FuncCounts.push_back(Cg.samples(F));
354       for (NodeId Succ : Cg.successors(F)) {
355         const Arc &Arc = *Cg.findArc(F, Succ);
356         CallCounts.push_back({F, Succ, uint64_t(Arc.weight())});
357         CallOffsets.push_back(uint64_t(Arc.avgCallOffset()));
358       }
359     }
360 
361     // Run the layout algorithm.
362     std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout(
363         FuncSizes, FuncCounts, CallCounts, CallOffsets);
364 
365     // Create a single cluster from the computed order of hot functions.
366     std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end());
367     Clusters.emplace_back(Cluster(NodeOrder, Cg));
368   } break;
369   case RT_PETTIS_HANSEN:
370     Clusters = pettisAndHansen(Cg);
371     break;
372   case RT_RANDOM:
373     std::srand(opts::RandomSeed);
374     Clusters = randomClusters(Cg);
375     break;
376   case RT_USER: {
377     // Build LTOCommonNameMap
378     StringMap<std::vector<uint64_t>> LTOCommonNameMap;
379     for (const BinaryFunction &BF : llvm::make_second_range(BFs))
380       for (StringRef Name : BF.getNames())
381         if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
382           LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress());
383 
384     uint32_t Index = 0;
385     uint32_t InvalidEntries = 0;
386     std::vector<std::string> FunctionNames;
387     if (Error E = readFunctionOrderFile(FunctionNames))
388       return Error(std::move(E));
389 
390     for (const std::string &Function : FunctionNames) {
391       std::vector<uint64_t> FuncAddrs;
392 
393       BinaryData *BD = BC.getBinaryDataByName(Function);
394       if (!BD) {
395         // If we can't find the main symbol name, look for alternates.
396         uint32_t LocalID = 1;
397         while (true) {
398           const std::string FuncName = Function + "/" + std::to_string(LocalID);
399           BD = BC.getBinaryDataByName(FuncName);
400           if (BD)
401             FuncAddrs.push_back(BD->getAddress());
402           else
403             break;
404           LocalID++;
405         }
406         // Strip LTO suffixes
407         if (std::optional<StringRef> CommonName = getLTOCommonName(Function))
408           if (LTOCommonNameMap.contains(*CommonName))
409             llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]);
410       } else {
411         FuncAddrs.push_back(BD->getAddress());
412       }
413 
414       if (FuncAddrs.empty()) {
415         if (opts::Verbosity >= 1)
416           BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
417                     << "for " << Function << "\n";
418         ++InvalidEntries;
419         continue;
420       }
421 
422       for (const uint64_t FuncAddr : FuncAddrs) {
423         const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
424         assert(FuncBD);
425 
426         BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
427         if (!BF) {
428           if (opts::Verbosity >= 1)
429             BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
430                       << "for " << Function << "\n";
431           ++InvalidEntries;
432           break;
433         }
434         if (!BF->hasValidIndex())
435           BF->setIndex(Index++);
436         else if (opts::Verbosity > 0)
437           BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
438                     << "\n";
439       }
440     }
441     if (InvalidEntries)
442       BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
443                 << InvalidEntries << " entries in -function-order list\n";
444   } break;
445 
446   default:
447     llvm_unreachable("unexpected layout type");
448   }
449 
450   reorder(BC, std::move(Clusters), BFs);
451 
452   BC.HasFinalizedFunctionOrder = true;
453 
454   std::unique_ptr<std::ofstream> FuncsFile;
455   if (!opts::GenerateFunctionOrderFile.empty()) {
456     FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
457                                                 std::ios::out);
458     if (!FuncsFile) {
459       BC.errs() << "BOLT-ERROR: ordered functions file "
460                 << opts::GenerateFunctionOrderFile << " cannot be opened\n";
461       return createFatalBOLTError("");
462     }
463   }
464 
465   std::unique_ptr<std::ofstream> LinkSectionsFile;
466   if (!opts::LinkSectionsFile.empty()) {
467     LinkSectionsFile =
468         std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
469     if (!LinkSectionsFile) {
470       BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
471                 << " cannot be opened\n";
472       return createFatalBOLTError("");
473     }
474   }
475 
476   if (FuncsFile || LinkSectionsFile) {
477     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
478     llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
479                     [](BinaryFunction &BF) { return &BF; });
480 
481     // Sort functions by index.
482     llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex);
483 
484     for (const BinaryFunction *Func : SortedFunctions) {
485       if (!Func->hasValidIndex())
486         break;
487       if (Func->isPLTFunction())
488         continue;
489 
490       if (FuncsFile)
491         *FuncsFile << Func->getOneName().str() << '\n';
492 
493       if (LinkSectionsFile) {
494         const char *Indent = "";
495         std::vector<StringRef> AllNames = Func->getNames();
496         llvm::sort(AllNames);
497         for (StringRef Name : AllNames) {
498           const size_t SlashPos = Name.find('/');
499           if (SlashPos != std::string::npos) {
500             // Avoid duplicates for local functions.
501             if (Name.find('/', SlashPos + 1) != std::string::npos)
502               continue;
503             Name = Name.substr(0, SlashPos);
504           }
505           *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
506           Indent = " ";
507         }
508       }
509     }
510 
511     if (FuncsFile) {
512       FuncsFile->close();
513       BC.outs() << "BOLT-INFO: dumped function order to "
514                 << opts::GenerateFunctionOrderFile << '\n';
515     }
516 
517     if (LinkSectionsFile) {
518       LinkSectionsFile->close();
519       BC.outs() << "BOLT-INFO: dumped linker section order to "
520                 << opts::LinkSectionsFile << '\n';
521     }
522   }
523   return Error::success();
524 }
525 
526 } // namespace bolt
527 } // namespace llvm
528