xref: /llvm-project/bolt/lib/Rewrite/BinaryPassManager.cpp (revision a5f3d1a803020167bd9d494a8a3921e7dcc1550a)
1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FixRISCVCallsPass.h"
16 #include "bolt/Passes/FixRelaxationPass.h"
17 #include "bolt/Passes/FrameOptimizer.h"
18 #include "bolt/Passes/Hugify.h"
19 #include "bolt/Passes/IdenticalCodeFolding.h"
20 #include "bolt/Passes/IndirectCallPromotion.h"
21 #include "bolt/Passes/Inliner.h"
22 #include "bolt/Passes/Instrumentation.h"
23 #include "bolt/Passes/JTFootprintReduction.h"
24 #include "bolt/Passes/LongJmp.h"
25 #include "bolt/Passes/LoopInversionPass.h"
26 #include "bolt/Passes/PLTCall.h"
27 #include "bolt/Passes/PatchEntries.h"
28 #include "bolt/Passes/RegReAssign.h"
29 #include "bolt/Passes/ReorderData.h"
30 #include "bolt/Passes/ReorderFunctions.h"
31 #include "bolt/Passes/RetpolineInsertion.h"
32 #include "bolt/Passes/SplitFunctions.h"
33 #include "bolt/Passes/StokeInfo.h"
34 #include "bolt/Passes/TailDuplication.h"
35 #include "bolt/Passes/ThreeWayBranch.h"
36 #include "bolt/Passes/ValidateInternalCalls.h"
37 #include "bolt/Passes/ValidateMemRefs.h"
38 #include "bolt/Passes/VeneerElimination.h"
39 #include "bolt/Utils/CommandLineOpts.h"
40 #include "llvm/Support/FormatVariadic.h"
41 #include "llvm/Support/Timer.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <memory>
44 #include <numeric>
45 
46 using namespace llvm;
47 
48 namespace opts {
49 
50 extern cl::opt<bool> PrintAll;
51 extern cl::opt<bool> PrintDynoStats;
52 extern cl::opt<bool> DumpDotAll;
53 extern cl::opt<std::string> AsmDump;
54 extern cl::opt<bolt::PLTCall::OptType> PLT;
55 
56 static cl::opt<bool>
57 DynoStatsAll("dyno-stats-all",
58   cl::desc("print dyno stats after each stage"),
59   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
60 
61 static cl::opt<bool>
62     EliminateUnreachable("eliminate-unreachable",
63                          cl::desc("eliminate unreachable code"), cl::init(true),
64                          cl::cat(BoltOptCategory));
65 
66 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
67                   cl::cat(BoltOptCategory));
68 
69 static cl::opt<bool> JTFootprintReductionFlag(
70     "jt-footprint-reduction",
71     cl::desc("make jump tables size smaller at the cost of using more "
72              "instructions at jump sites"),
73     cl::cat(BoltOptCategory));
74 
75 static cl::opt<bool>
76     KeepNops("keep-nops",
77              cl::desc("keep no-op instructions. By default they are removed."),
78              cl::Hidden, cl::cat(BoltOptCategory));
79 
80 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
81                          cl::ReallyHidden, cl::cat(BoltOptCategory));
82 
83 cl::opt<bool>
84 PrintAfterBranchFixup("print-after-branch-fixup",
85   cl::desc("print function after fixing local branches"),
86   cl::Hidden, cl::cat(BoltOptCategory));
87 
88 static cl::opt<bool>
89 PrintAfterLowering("print-after-lowering",
90   cl::desc("print function after instruction lowering"),
91   cl::Hidden, cl::cat(BoltOptCategory));
92 
93 cl::opt<bool>
94 PrintFinalized("print-finalized",
95   cl::desc("print function after CFG is finalized"),
96   cl::Hidden, cl::cat(BoltOptCategory));
97 
98 static cl::opt<bool>
99     PrintFOP("print-fop",
100              cl::desc("print functions after frame optimizer pass"), cl::Hidden,
101              cl::cat(BoltOptCategory));
102 
103 static cl::opt<bool>
104     PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
105              cl::Hidden, cl::cat(BoltOptCategory));
106 
107 static cl::opt<bool>
108     PrintICP("print-icp",
109              cl::desc("print functions after indirect call promotion"),
110              cl::Hidden, cl::cat(BoltOptCategory));
111 
112 static cl::opt<bool>
113     PrintInline("print-inline",
114                 cl::desc("print functions after inlining optimization"),
115                 cl::Hidden, cl::cat(BoltOptCategory));
116 
117 static cl::opt<bool> PrintJTFootprintReduction(
118     "print-after-jt-footprint-reduction",
119     cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
120     cl::cat(BoltOptCategory));
121 
122 static cl::opt<bool>
123     PrintLongJmp("print-longjmp",
124                  cl::desc("print functions after longjmp pass"), cl::Hidden,
125                  cl::cat(BoltOptCategory));
126 
127 cl::opt<bool>
128     PrintNormalized("print-normalized",
129                     cl::desc("print functions after CFG is normalized"),
130                     cl::Hidden, cl::cat(BoltCategory));
131 
132 static cl::opt<bool> PrintOptimizeBodyless(
133     "print-optimize-bodyless",
134     cl::desc("print functions after bodyless optimization"), cl::Hidden,
135     cl::cat(BoltOptCategory));
136 
137 static cl::opt<bool>
138     PrintPeepholes("print-peepholes",
139                    cl::desc("print functions after peephole optimization"),
140                    cl::Hidden, cl::cat(BoltOptCategory));
141 
142 static cl::opt<bool>
143     PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
144              cl::Hidden, cl::cat(BoltOptCategory));
145 
146 static cl::opt<bool>
147     PrintProfileStats("print-profile-stats",
148                       cl::desc("print profile quality/bias analysis"),
149                       cl::cat(BoltCategory));
150 
151 static cl::opt<bool>
152     PrintRegReAssign("print-regreassign",
153                      cl::desc("print functions after regreassign pass"),
154                      cl::Hidden, cl::cat(BoltOptCategory));
155 
156 cl::opt<bool>
157     PrintReordered("print-reordered",
158                    cl::desc("print functions after layout optimization"),
159                    cl::Hidden, cl::cat(BoltOptCategory));
160 
161 static cl::opt<bool>
162     PrintReorderedFunctions("print-reordered-functions",
163                             cl::desc("print functions after clustering"),
164                             cl::Hidden, cl::cat(BoltOptCategory));
165 
166 static cl::opt<bool> PrintRetpolineInsertion(
167     "print-retpoline-insertion",
168     cl::desc("print functions after retpoline insertion pass"), cl::Hidden,
169     cl::cat(BoltCategory));
170 
171 static cl::opt<bool> PrintSCTC(
172     "print-sctc",
173     cl::desc("print functions after conditional tail call simplification"),
174     cl::Hidden, cl::cat(BoltOptCategory));
175 
176 static cl::opt<bool> PrintSimplifyROLoads(
177     "print-simplify-rodata-loads",
178     cl::desc("print functions after simplification of RO data loads"),
179     cl::Hidden, cl::cat(BoltOptCategory));
180 
181 static cl::opt<bool>
182     PrintSplit("print-split", cl::desc("print functions after code splitting"),
183                cl::Hidden, cl::cat(BoltOptCategory));
184 
185 static cl::opt<bool>
186     PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
187                cl::Hidden, cl::cat(BoltOptCategory));
188 
189 static cl::opt<bool>
190     PrintFixRelaxations("print-fix-relaxations",
191                         cl::desc("print functions after fix relaxations pass"),
192                         cl::Hidden, cl::cat(BoltOptCategory));
193 
194 static cl::opt<bool>
195     PrintFixRISCVCalls("print-fix-riscv-calls",
196                        cl::desc("print functions after fix RISCV calls pass"),
197                        cl::Hidden, cl::cat(BoltOptCategory));
198 
199 static cl::opt<bool> PrintVeneerElimination(
200     "print-veneer-elimination",
201     cl::desc("print functions after veneer elimination pass"), cl::Hidden,
202     cl::cat(BoltOptCategory));
203 
204 static cl::opt<bool>
205     PrintUCE("print-uce",
206              cl::desc("print functions after unreachable code elimination"),
207              cl::Hidden, cl::cat(BoltOptCategory));
208 
209 static cl::opt<bool> RegReAssign(
210     "reg-reassign",
211     cl::desc(
212         "reassign registers so as to avoid using REX prefixes in hot code"),
213     cl::cat(BoltOptCategory));
214 
215 static cl::opt<bool> SimplifyConditionalTailCalls(
216     "simplify-conditional-tail-calls",
217     cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
218     cl::init(true), cl::cat(BoltOptCategory));
219 
220 static cl::opt<bool> SimplifyRODataLoads(
221     "simplify-rodata-loads",
222     cl::desc("simplify loads from read-only sections by replacing the memory "
223              "operand with the constant found in the corresponding section"),
224     cl::cat(BoltOptCategory));
225 
226 static cl::list<std::string>
227 SpecializeMemcpy1("memcpy1-spec",
228   cl::desc("list of functions with call sites for which to specialize memcpy() "
229            "for size 1"),
230   cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
231   cl::ZeroOrMore, cl::cat(BoltOptCategory));
232 
233 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
234                            cl::cat(BoltOptCategory));
235 
236 static cl::opt<bool> StringOps(
237     "inline-memcpy",
238     cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
239     cl::cat(BoltOptCategory));
240 
241 static cl::opt<bool> StripRepRet(
242     "strip-rep-ret",
243     cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
244     cl::init(true), cl::cat(BoltOptCategory));
245 
246 static cl::opt<bool> VerifyCFG("verify-cfg",
247                                cl::desc("verify the CFG after every pass"),
248                                cl::Hidden, cl::cat(BoltOptCategory));
249 
250 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
251                                         cl::desc("reorder three way branches"),
252                                         cl::ReallyHidden,
253                                         cl::cat(BoltOptCategory));
254 
255 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
256                                         cl::desc("fold jcc+mov into cmov"),
257                                         cl::ReallyHidden,
258                                         cl::cat(BoltOptCategory));
259 
260 } // namespace opts
261 
262 namespace llvm {
263 namespace bolt {
264 
265 using namespace opts;
266 
267 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
268 const char BinaryFunctionPassManager::TimerGroupDesc[] =
269     "Binary Function Pass Manager";
270 
271 void BinaryFunctionPassManager::runPasses() {
272   auto &BFs = BC.getBinaryFunctions();
273   for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
274     const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
275         &OptPassPair = Passes[PassIdx];
276     if (!OptPassPair.first)
277       continue;
278 
279     const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
280     std::string PassIdName =
281         formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
282 
283     if (opts::Verbosity > 0)
284       outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
285 
286     NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
287                        TimerGroupDesc, TimeOpts);
288 
289     callWithDynoStats([this, &Pass] { cantFail(Pass->runOnFunctions(BC)); },
290                       BFs, Pass->getName(), opts::DynoStatsAll, BC.isAArch64());
291 
292     if (opts::VerifyCFG &&
293         !std::accumulate(
294             BFs.begin(), BFs.end(), true,
295             [](const bool Valid,
296                const std::pair<const uint64_t, BinaryFunction> &It) {
297               return Valid && It.second.validateCFG();
298             })) {
299       errs() << "BOLT-ERROR: Invalid CFG detected after pass "
300              << Pass->getName() << "\n";
301       exit(1);
302     }
303 
304     if (opts::Verbosity > 0)
305       outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
306 
307     if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
308       continue;
309 
310     const std::string Message = std::string("after ") + Pass->getName();
311 
312     for (auto &It : BFs) {
313       BinaryFunction &Function = It.second;
314 
315       if (!Pass->shouldPrint(Function))
316         continue;
317 
318       Function.print(outs(), Message);
319 
320       if (opts::DumpDotAll)
321         Function.dumpGraphForPass(PassIdName);
322     }
323   }
324 }
325 
326 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
327   BinaryFunctionPassManager Manager(BC);
328 
329   const DynoStats InitialDynoStats =
330       getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
331 
332   Manager.registerPass(std::make_unique<AsmDumpPass>(),
333                        opts::AsmDump.getNumOccurrences());
334 
335   if (BC.isAArch64()) {
336     Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations));
337 
338     Manager.registerPass(
339         std::make_unique<VeneerElimination>(PrintVeneerElimination));
340   }
341 
342   if (BC.isRISCV()) {
343     Manager.registerPass(
344         std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls));
345   }
346 
347   // Here we manage dependencies/order manually, since passes are run in the
348   // order they're registered.
349 
350   // Run this pass first to use stats for the original functions.
351   Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint));
352 
353   if (opts::PrintProfileStats)
354     Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
355 
356   Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
357 
358   Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
359 
360   if (opts::Instrument)
361     Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
362   else if (opts::Hugify)
363     Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
364 
365   Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
366 
367   Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint),
368                        !opts::KeepNops);
369 
370   Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
371 
372   Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
373                        opts::StripRepRet);
374 
375   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
376                        opts::ICF);
377 
378   Manager.registerPass(
379       std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
380       !opts::SpecializeMemcpy1.empty());
381 
382   Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
383                        opts::StringOps);
384 
385   Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
386 
387   Manager.registerPass(
388       std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
389       opts::JTFootprintReductionFlag);
390 
391   Manager.registerPass(
392       std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
393       opts::SimplifyRODataLoads);
394 
395   Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
396                        opts::RegReAssign);
397 
398   Manager.registerPass(std::make_unique<Inliner>(PrintInline));
399 
400   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
401                        opts::ICF);
402 
403   Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
404 
405   Manager.registerPass(std::make_unique<ThreeWayBranch>(),
406                        opts::ThreeWayBranchFlag);
407 
408   Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
409 
410   Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
411                        opts::EliminateUnreachable);
412 
413   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
414 
415   Manager.registerPass(std::make_unique<LoopInversionPass>());
416 
417   Manager.registerPass(std::make_unique<TailDuplication>());
418 
419   Manager.registerPass(std::make_unique<CMOVConversion>(),
420                        opts::CMOVConversionFlag);
421 
422   // This pass syncs local branches with CFG. If any of the following
423   // passes breaks the sync - they either need to re-run the pass or
424   // fix branches consistency internally.
425   Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
426 
427   // This pass should come close to last since it uses the estimated hot
428   // size of a function to determine the order.  It should definitely
429   // also happen after any changes to the call graph are made, e.g. inlining.
430   Manager.registerPass(
431       std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
432 
433   // This is the second run of the SplitFunctions pass required by certain
434   // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
435   // after ReorderFunctions allows the finalized function order to be utilized
436   // to make more sophisticated splitting decisions, like hot-warm-cold
437   // splitting.
438   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
439 
440   // Print final dyno stats right while CFG and instruction analysis are intact.
441   Manager.registerPass(
442       std::make_unique<DynoStatsPrintPass>(
443           InitialDynoStats, "after all optimizations before SCTC and FOP"),
444       opts::PrintDynoStats || opts::DynoStatsAll);
445 
446   // Add the StokeInfo pass, which extract functions for stoke optimization and
447   // get the liveness information for them
448   Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
449 
450   // This pass introduces conditional jumps into external functions.
451   // Between extending CFG to support this and isolating this pass we chose
452   // the latter. Thus this pass will do double jump removal and unreachable
453   // code elimination if necessary and won't rely on peepholes/UCE for these
454   // optimizations.
455   // More generally this pass should be the last optimization pass that
456   // modifies branches/control flow.  This pass is run after function
457   // reordering so that it can tell whether calls are forward/backward
458   // accurately.
459   Manager.registerPass(
460       std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
461       opts::SimplifyConditionalTailCalls);
462 
463   Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
464 
465   Manager.registerPass(std::make_unique<AlignerPass>());
466 
467   // Perform reordering on data contained in one or more sections using
468   // memory profiling data.
469   Manager.registerPass(std::make_unique<ReorderData>());
470 
471   if (BC.isAArch64()) {
472     Manager.registerPass(std::make_unique<ADRRelaxationPass>());
473 
474     // Tighten branches according to offset differences between branch and
475     // targets. No extra instructions after this pass, otherwise we may have
476     // relocations out of range and crash during linking.
477     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
478   }
479 
480   // This pass should always run last.*
481   Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
482 
483   // FrameOptimizer has an implicit dependency on FinalizeFunctions.
484   // FrameOptimizer move values around and needs to update CFIs. To do this, it
485   // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
486   // placed according to the final layout.
487   Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
488 
489   Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
490 
491   Manager.registerPass(
492       std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
493 
494   // Assign each function an output section.
495   Manager.registerPass(std::make_unique<AssignSections>());
496 
497   // Patch original function entries
498   if (BC.HasRelocations)
499     Manager.registerPass(std::make_unique<PatchEntries>());
500 
501   // This pass turns tail calls into jumps which makes them invisible to
502   // function reordering. It's unsafe to use any CFG or instruction analysis
503   // after this point.
504   Manager.registerPass(
505       std::make_unique<InstructionLowering>(PrintAfterLowering));
506 
507   // In non-relocation mode, mark functions that do not fit into their original
508   // space as non-simple if we have to (e.g. for correct debug info update).
509   // NOTE: this pass depends on finalized code.
510   if (!BC.HasRelocations)
511     Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
512 
513   Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
514 
515   // Check for dirty state of MCSymbols caused by running calculateEmittedSize
516   // in parallel and restore them
517   Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint));
518 
519   Manager.runPasses();
520 }
521 
522 } // namespace bolt
523 } // namespace llvm
524