xref: /llvm-project/bolt/lib/Passes/ReorderFunctions.cpp (revision 9460ebd13077feb164ea1687ce1d7c31000e8bef)
1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Transforms/Utils/CodeLayout.h"
19 #include <fstream>
20 
21 #define DEBUG_TYPE "hfsort"
22 
23 using namespace llvm;
24 
25 namespace opts {
26 
27 extern cl::OptionCategory BoltOptCategory;
28 extern cl::opt<unsigned> Verbosity;
29 extern cl::opt<uint32_t> RandomSeed;
30 
31 extern size_t padFunction(const bolt::BinaryFunction &Function);
32 
33 cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
34     "reorder-functions",
35     cl::desc("reorder and cluster functions (works only with relocations)"),
36     cl::init(bolt::ReorderFunctions::RT_NONE),
37     cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none",
38                           "do not reorder functions"),
39                clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count",
40                           "order by execution count"),
41                clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
42                           "use hfsort algorithm"),
43                clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
44                           "use hfsort+ algorithm"),
45                clEnumValN(bolt::ReorderFunctions::RT_CDS, "cds",
46                           "use cache-directed sort"),
47                clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
48                           "pettis-hansen", "use Pettis-Hansen algorithm"),
49                clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
50                           "reorder functions randomly"),
51                clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
52                           "use function order specified by -function-order")),
53     cl::ZeroOrMore, cl::cat(BoltOptCategory));
54 
55 static cl::opt<bool> ReorderFunctionsUseHotSize(
56     "reorder-functions-use-hot-size",
57     cl::desc("use a function's hot size when doing clustering"), cl::init(true),
58     cl::cat(BoltOptCategory));
59 
60 static cl::opt<std::string> FunctionOrderFile(
61     "function-order",
62     cl::desc("file containing an ordered list of functions to use for function "
63              "reordering"),
64     cl::cat(BoltOptCategory));
65 
66 static cl::opt<std::string> GenerateFunctionOrderFile(
67     "generate-function-order",
68     cl::desc("file to dump the ordered list of functions to use for function "
69              "reordering"),
70     cl::cat(BoltOptCategory));
71 
72 static cl::opt<std::string> LinkSectionsFile(
73     "generate-link-sections",
74     cl::desc("generate a list of function sections in a format suitable for "
75              "inclusion in a linker script"),
76     cl::cat(BoltOptCategory));
77 
78 static cl::opt<bool>
79     UseEdgeCounts("use-edge-counts",
80                   cl::desc("use edge count data when doing clustering"),
81                   cl::init(true), cl::cat(BoltOptCategory));
82 
83 static cl::opt<bool> CgFromPerfData(
84     "cg-from-perf-data",
85     cl::desc("use perf data directly when constructing the call graph"
86              " for stale functions"),
87     cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
88 
89 static cl::opt<bool> CgIgnoreRecursiveCalls(
90     "cg-ignore-recursive-calls",
91     cl::desc("ignore recursive calls when constructing the call graph"),
92     cl::init(true), cl::cat(BoltOptCategory));
93 
94 static cl::opt<bool> CgUseSplitHotSize(
95     "cg-use-split-hot-size",
96     cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
97              "call graph functions"),
98     cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
99 
100 } // namespace opts
101 
102 namespace llvm {
103 namespace bolt {
104 
105 using NodeId = CallGraph::NodeId;
106 using Arc = CallGraph::Arc;
107 using Node = CallGraph::Node;
108 
109 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
110                                std::map<uint64_t, BinaryFunction> &BFs) {
111   std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
112   uint64_t TotalSize = 0;
113   uint32_t Index = 0;
114 
115   // Set order of hot functions based on clusters.
116   for (const Cluster &Cluster : Clusters) {
117     for (const NodeId FuncId : Cluster.targets()) {
118       Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
119       FuncAddr[FuncId] = TotalSize;
120       TotalSize += Cg.size(FuncId);
121     }
122   }
123 
124   // Assign valid index for functions with valid profile.
125   for (auto &It : BFs) {
126     BinaryFunction &BF = It.second;
127     if (!BF.hasValidIndex() && BF.hasValidProfile())
128       BF.setIndex(Index++);
129   }
130 
131   if (opts::ReorderFunctions == RT_NONE)
132     return;
133 
134   printStats(Clusters, FuncAddr);
135 }
136 
137 void ReorderFunctions::printStats(const std::vector<Cluster> &Clusters,
138                                   const std::vector<uint64_t> &FuncAddr) {
139   if (opts::Verbosity == 0) {
140 #ifndef NDEBUG
141     if (!DebugFlag || !isCurrentDebugType("hfsort"))
142       return;
143 #else
144     return;
145 #endif
146   }
147 
148   bool PrintDetailed = opts::Verbosity > 1;
149 #ifndef NDEBUG
150   PrintDetailed |=
151       (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
152 #endif
153   uint64_t TotalSize = 0;
154   uint64_t CurPage = 0;
155   uint64_t Hotfuncs = 0;
156   double TotalDistance = 0;
157   double TotalCalls = 0;
158   double TotalCalls64B = 0;
159   double TotalCalls4KB = 0;
160   double TotalCalls2MB = 0;
161   if (PrintDetailed)
162     outs() << "BOLT-INFO: Function reordering page layout\n"
163            << "BOLT-INFO: ============== page 0 ==============\n";
164   for (const Cluster &Cluster : Clusters) {
165     if (PrintDetailed)
166       outs() << format(
167           "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
168           Cluster.density(), Cluster.samples(), Cluster.size());
169 
170     for (NodeId FuncId : Cluster.targets()) {
171       if (Cg.samples(FuncId) > 0) {
172         Hotfuncs++;
173 
174         if (PrintDetailed)
175           outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " ("
176                  << Cg.size(FuncId) << ")\n";
177 
178         uint64_t Dist = 0;
179         uint64_t Calls = 0;
180         for (NodeId Dst : Cg.successors(FuncId)) {
181           if (FuncId == Dst) // ignore recursive calls in stats
182             continue;
183           const Arc &Arc = *Cg.findArc(FuncId, Dst);
184           const auto D = std::abs(FuncAddr[Arc.dst()] -
185                                   (FuncAddr[FuncId] + Arc.avgCallOffset()));
186           const double W = Arc.weight();
187           if (D < 64 && PrintDetailed && opts::Verbosity > 2)
188             outs() << "BOLT-INFO: short (" << D << "B) call:\n"
189                    << "BOLT-INFO:   Src: " << *Cg.nodeIdToFunc(FuncId) << "\n"
190                    << "BOLT-INFO:   Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
191                    << "BOLT-INFO:   Weight = " << W << "\n"
192                    << "BOLT-INFO:   AvgOffset = " << Arc.avgCallOffset()
193                    << "\n";
194           Calls += W;
195           if (D < 64)
196             TotalCalls64B += W;
197           if (D < 4096)
198             TotalCalls4KB += W;
199           if (D < (2 << 20))
200             TotalCalls2MB += W;
201           Dist += Arc.weight() * D;
202           if (PrintDetailed)
203             outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
204                              "weight = %.0lf, callDist = %f\n",
205                              Arc.src(), FuncAddr[Arc.src()],
206                              Arc.avgCallOffset(), Arc.dst(),
207                              FuncAddr[Arc.dst()], Arc.weight(), D);
208         }
209         TotalCalls += Calls;
210         TotalDistance += Dist;
211         TotalSize += Cg.size(FuncId);
212 
213         if (PrintDetailed) {
214           outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
215                            TotalSize, Calls ? Dist / Calls : 0)
216                  << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
217           const uint64_t NewPage = TotalSize / HugePageSize;
218           if (NewPage != CurPage) {
219             CurPage = NewPage;
220             outs() << format(
221                 "BOLT-INFO: ============== page %u ==============\n", CurPage);
222           }
223         }
224       }
225     }
226   }
227   outs() << "BOLT-INFO: Function reordering stats\n"
228          << format("BOLT-INFO:  Number of hot functions: %u\n"
229                    "BOLT-INFO:  Number of clusters: %lu\n",
230                    Hotfuncs, Clusters.size())
231          << format("BOLT-INFO:  Final average call distance = %.1lf "
232                    "(%.0lf / %.0lf)\n",
233                    TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance,
234                    TotalCalls)
235          << format("BOLT-INFO:  Total Calls = %.0lf\n", TotalCalls);
236   if (TotalCalls)
237     outs() << format("BOLT-INFO:  Total Calls within 64B = %.0lf (%.2lf%%)\n",
238                      TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
239            << format("BOLT-INFO:  Total Calls within 4KB = %.0lf (%.2lf%%)\n",
240                      TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
241            << format("BOLT-INFO:  Total Calls within 2MB = %.0lf (%.2lf%%)\n",
242                      TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
243 }
244 
245 std::vector<std::string> ReorderFunctions::readFunctionOrderFile() {
246   std::vector<std::string> FunctionNames;
247   std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
248   if (!FuncsFile) {
249     errs() << "Ordered functions file \"" << opts::FunctionOrderFile
250            << "\" can't be opened.\n";
251     exit(1);
252   }
253   std::string FuncName;
254   while (std::getline(FuncsFile, FuncName))
255     FunctionNames.push_back(FuncName);
256   return FunctionNames;
257 }
258 
259 void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
260   auto &BFs = BC.getBinaryFunctions();
261   if (opts::ReorderFunctions != RT_NONE &&
262       opts::ReorderFunctions != RT_EXEC_COUNT &&
263       opts::ReorderFunctions != RT_USER) {
264     Cg = buildCallGraph(
265         BC,
266         [](const BinaryFunction &BF) {
267           if (!BF.hasProfile())
268             return true;
269           if (BF.getState() != BinaryFunction::State::CFG)
270             return true;
271           return false;
272         },
273         opts::CgFromPerfData,
274         /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
275         opts::CgUseSplitHotSize, opts::UseEdgeCounts,
276         opts::CgIgnoreRecursiveCalls);
277     Cg.normalizeArcWeights();
278   }
279 
280   std::vector<Cluster> Clusters;
281 
282   switch (opts::ReorderFunctions) {
283   case RT_NONE:
284     break;
285   case RT_EXEC_COUNT: {
286     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
287     llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
288                     [](BinaryFunction &BF) { return &BF; });
289     llvm::stable_sort(SortedFunctions,
290                       [&](const BinaryFunction *A, const BinaryFunction *B) {
291                         if (A->isIgnored())
292                           return false;
293                         if (B->isIgnored())
294                           return true;
295                         const size_t PadA = opts::padFunction(*A);
296                         const size_t PadB = opts::padFunction(*B);
297                         if (!PadA || !PadB) {
298                           if (PadA)
299                             return true;
300                           if (PadB)
301                             return false;
302                         }
303                         if (!A->hasProfile())
304                           return false;
305                         if (!B->hasProfile())
306                           return true;
307                         return A->getExecutionCount() > B->getExecutionCount();
308                       });
309     uint32_t Index = 0;
310     for (BinaryFunction *BF : SortedFunctions)
311       if (BF->hasProfile()) {
312         BF->setIndex(Index++);
313         LLVM_DEBUG(if (opts::Verbosity > 1) {
314           dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " ("
315                  << BF->getExecutionCount() << ")\n";
316         });
317       }
318   } break;
319   case RT_HFSORT:
320     Clusters = clusterize(Cg);
321     break;
322   case RT_HFSORT_PLUS:
323     Clusters = hfsortPlus(Cg);
324     break;
325   case RT_CDS: {
326     // It is required that the sum of incoming arc weights is not greater
327     // than the number of samples for every function. Ensuring the call graph
328     // obeys the property before running the algorithm.
329     Cg.adjustArcWeights();
330 
331     // Initialize CFG nodes and their data
332     std::vector<uint64_t> FuncSizes;
333     std::vector<uint64_t> FuncCounts;
334     using JumpT = std::pair<uint64_t, uint64_t>;
335     std::vector<std::pair<JumpT, uint64_t>> CallCounts;
336     std::vector<uint64_t> CallOffsets;
337     for (NodeId F = 0; F < Cg.numNodes(); ++F) {
338       FuncSizes.push_back(Cg.size(F));
339       FuncCounts.push_back(Cg.samples(F));
340       for (NodeId Succ : Cg.successors(F)) {
341         const Arc &Arc = *Cg.findArc(F, Succ);
342         auto It = std::make_pair(F, Succ);
343         CallCounts.push_back(std::make_pair(It, Arc.weight()));
344         CallOffsets.push_back(uint64_t(Arc.avgCallOffset()));
345       }
346     }
347 
348     // Run the layout algorithm.
349     std::vector<uint64_t> Result =
350         applyCDSLayout(FuncSizes, FuncCounts, CallCounts, CallOffsets);
351 
352     // Create a single cluster from the computed order of hot functions.
353     std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end());
354     Clusters.emplace_back(Cluster(NodeOrder, Cg));
355   } break;
356   case RT_PETTIS_HANSEN:
357     Clusters = pettisAndHansen(Cg);
358     break;
359   case RT_RANDOM:
360     std::srand(opts::RandomSeed);
361     Clusters = randomClusters(Cg);
362     break;
363   case RT_USER: {
364     // Build LTOCommonNameMap
365     StringMap<std::vector<uint64_t>> LTOCommonNameMap;
366     for (const BinaryFunction &BF : llvm::make_second_range(BFs))
367       for (StringRef Name : BF.getNames())
368         if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
369           LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress());
370 
371     uint32_t Index = 0;
372     uint32_t InvalidEntries = 0;
373     for (const std::string &Function : readFunctionOrderFile()) {
374       std::vector<uint64_t> FuncAddrs;
375 
376       BinaryData *BD = BC.getBinaryDataByName(Function);
377       if (!BD) {
378         // If we can't find the main symbol name, look for alternates.
379         uint32_t LocalID = 1;
380         while (true) {
381           const std::string FuncName = Function + "/" + std::to_string(LocalID);
382           BD = BC.getBinaryDataByName(FuncName);
383           if (BD)
384             FuncAddrs.push_back(BD->getAddress());
385           else
386             break;
387           LocalID++;
388         }
389         // Strip LTO suffixes
390         if (std::optional<StringRef> CommonName = getLTOCommonName(Function))
391           if (LTOCommonNameMap.contains(*CommonName))
392             llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]);
393       } else {
394         FuncAddrs.push_back(BD->getAddress());
395       }
396 
397       if (FuncAddrs.empty()) {
398         if (opts::Verbosity >= 1)
399           errs() << "BOLT-WARNING: Reorder functions: can't find function "
400                  << "for " << Function << "\n";
401         ++InvalidEntries;
402         continue;
403       }
404 
405       for (const uint64_t FuncAddr : FuncAddrs) {
406         const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
407         assert(FuncBD);
408 
409         BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
410         if (!BF) {
411           if (opts::Verbosity >= 1)
412             errs() << "BOLT-WARNING: Reorder functions: can't find function "
413                    << "for " << Function << "\n";
414           ++InvalidEntries;
415           break;
416         }
417         if (!BF->hasValidIndex())
418           BF->setIndex(Index++);
419         else if (opts::Verbosity > 0)
420           errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
421                  << "\n";
422       }
423     }
424     if (InvalidEntries)
425       errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
426              << InvalidEntries << " entries in -function-order list\n";
427   } break;
428   }
429 
430   reorder(std::move(Clusters), BFs);
431 
432   std::unique_ptr<std::ofstream> FuncsFile;
433   if (!opts::GenerateFunctionOrderFile.empty()) {
434     FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
435                                                 std::ios::out);
436     if (!FuncsFile) {
437       errs() << "BOLT-ERROR: ordered functions file "
438              << opts::GenerateFunctionOrderFile << " cannot be opened\n";
439       exit(1);
440     }
441   }
442 
443   std::unique_ptr<std::ofstream> LinkSectionsFile;
444   if (!opts::LinkSectionsFile.empty()) {
445     LinkSectionsFile =
446         std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
447     if (!LinkSectionsFile) {
448       errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
449              << " cannot be opened\n";
450       exit(1);
451     }
452   }
453 
454   if (FuncsFile || LinkSectionsFile) {
455     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
456     llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
457                     [](BinaryFunction &BF) { return &BF; });
458 
459     // Sort functions by index.
460     llvm::stable_sort(SortedFunctions,
461                       [](const BinaryFunction *A, const BinaryFunction *B) {
462                         if (A->hasValidIndex() && B->hasValidIndex())
463                           return A->getIndex() < B->getIndex();
464                         if (A->hasValidIndex() && !B->hasValidIndex())
465                           return true;
466                         if (!A->hasValidIndex() && B->hasValidIndex())
467                           return false;
468                         return A->getAddress() < B->getAddress();
469                       });
470 
471     for (const BinaryFunction *Func : SortedFunctions) {
472       if (!Func->hasValidIndex())
473         break;
474       if (Func->isPLTFunction())
475         continue;
476 
477       if (FuncsFile)
478         *FuncsFile << Func->getOneName().str() << '\n';
479 
480       if (LinkSectionsFile) {
481         const char *Indent = "";
482         std::vector<StringRef> AllNames = Func->getNames();
483         llvm::sort(AllNames);
484         for (StringRef Name : AllNames) {
485           const size_t SlashPos = Name.find('/');
486           if (SlashPos != std::string::npos) {
487             // Avoid duplicates for local functions.
488             if (Name.find('/', SlashPos + 1) != std::string::npos)
489               continue;
490             Name = Name.substr(0, SlashPos);
491           }
492           *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
493           Indent = " ";
494         }
495       }
496     }
497 
498     if (FuncsFile) {
499       FuncsFile->close();
500       outs() << "BOLT-INFO: dumped function order to "
501              << opts::GenerateFunctionOrderFile << '\n';
502     }
503 
504     if (LinkSectionsFile) {
505       LinkSectionsFile->close();
506       outs() << "BOLT-INFO: dumped linker section order to "
507              << opts::LinkSectionsFile << '\n';
508     }
509   }
510 }
511 
512 } // namespace bolt
513 } // namespace llvm
514