xref: /llvm-project/bolt/lib/Rewrite/BinaryPassManager.cpp (revision 5fb59e744783cf686e6a355c8331eeab90678c00)
1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FixRISCVCallsPass.h"
16 #include "bolt/Passes/FixRelaxationPass.h"
17 #include "bolt/Passes/FrameOptimizer.h"
18 #include "bolt/Passes/Hugify.h"
19 #include "bolt/Passes/IdenticalCodeFolding.h"
20 #include "bolt/Passes/IndirectCallPromotion.h"
21 #include "bolt/Passes/Inliner.h"
22 #include "bolt/Passes/Instrumentation.h"
23 #include "bolt/Passes/JTFootprintReduction.h"
24 #include "bolt/Passes/LongJmp.h"
25 #include "bolt/Passes/LoopInversionPass.h"
26 #include "bolt/Passes/PLTCall.h"
27 #include "bolt/Passes/PatchEntries.h"
28 #include "bolt/Passes/RegReAssign.h"
29 #include "bolt/Passes/ReorderData.h"
30 #include "bolt/Passes/ReorderFunctions.h"
31 #include "bolt/Passes/RetpolineInsertion.h"
32 #include "bolt/Passes/SplitFunctions.h"
33 #include "bolt/Passes/StokeInfo.h"
34 #include "bolt/Passes/TailDuplication.h"
35 #include "bolt/Passes/ThreeWayBranch.h"
36 #include "bolt/Passes/ValidateInternalCalls.h"
37 #include "bolt/Passes/ValidateMemRefs.h"
38 #include "bolt/Passes/VeneerElimination.h"
39 #include "bolt/Utils/CommandLineOpts.h"
40 #include "llvm/Support/FormatVariadic.h"
41 #include "llvm/Support/Timer.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <memory>
44 #include <numeric>
45 
46 using namespace llvm;
47 
48 namespace opts {
49 
50 extern cl::opt<bool> PrintAll;
51 extern cl::opt<bool> PrintDynoStats;
52 extern cl::opt<bool> DumpDotAll;
53 extern cl::opt<std::string> AsmDump;
54 extern cl::opt<bolt::PLTCall::OptType> PLT;
55 
56 static cl::opt<bool>
57 DynoStatsAll("dyno-stats-all",
58   cl::desc("print dyno stats after each stage"),
59   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
60 
61 static cl::opt<bool>
62     EliminateUnreachable("eliminate-unreachable",
63                          cl::desc("eliminate unreachable code"), cl::init(true),
64                          cl::cat(BoltOptCategory));
65 
66 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
67                   cl::cat(BoltOptCategory));
68 
69 static cl::opt<bool> JTFootprintReductionFlag(
70     "jt-footprint-reduction",
71     cl::desc("make jump tables size smaller at the cost of using more "
72              "instructions at jump sites"),
73     cl::cat(BoltOptCategory));
74 
75 cl::opt<bool>
76     KeepNops("keep-nops",
77              cl::desc("keep no-op instructions. By default they are removed."),
78              cl::Hidden, cl::cat(BoltOptCategory));
79 
80 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
81                          cl::ReallyHidden, cl::cat(BoltOptCategory));
82 
83 cl::opt<bool>
84 PrintAfterBranchFixup("print-after-branch-fixup",
85   cl::desc("print function after fixing local branches"),
86   cl::Hidden, cl::cat(BoltOptCategory));
87 
88 static cl::opt<bool>
89 PrintAfterLowering("print-after-lowering",
90   cl::desc("print function after instruction lowering"),
91   cl::Hidden, cl::cat(BoltOptCategory));
92 
93 cl::opt<bool>
94 PrintFinalized("print-finalized",
95   cl::desc("print function after CFG is finalized"),
96   cl::Hidden, cl::cat(BoltOptCategory));
97 
98 static cl::opt<bool>
99     PrintFOP("print-fop",
100              cl::desc("print functions after frame optimizer pass"), cl::Hidden,
101              cl::cat(BoltOptCategory));
102 
103 static cl::opt<bool>
104     PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
105              cl::Hidden, cl::cat(BoltOptCategory));
106 
107 static cl::opt<bool>
108     PrintICP("print-icp",
109              cl::desc("print functions after indirect call promotion"),
110              cl::Hidden, cl::cat(BoltOptCategory));
111 
112 static cl::opt<bool>
113     PrintInline("print-inline",
114                 cl::desc("print functions after inlining optimization"),
115                 cl::Hidden, cl::cat(BoltOptCategory));
116 
117 static cl::opt<bool> PrintJTFootprintReduction(
118     "print-after-jt-footprint-reduction",
119     cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
120     cl::cat(BoltOptCategory));
121 
122 static cl::opt<bool>
123     PrintLongJmp("print-longjmp",
124                  cl::desc("print functions after longjmp pass"), cl::Hidden,
125                  cl::cat(BoltOptCategory));
126 
127 cl::opt<bool>
128     PrintNormalized("print-normalized",
129                     cl::desc("print functions after CFG is normalized"),
130                     cl::Hidden, cl::cat(BoltCategory));
131 
132 static cl::opt<bool> PrintOptimizeBodyless(
133     "print-optimize-bodyless",
134     cl::desc("print functions after bodyless optimization"), cl::Hidden,
135     cl::cat(BoltOptCategory));
136 
137 static cl::opt<bool>
138     PrintPeepholes("print-peepholes",
139                    cl::desc("print functions after peephole optimization"),
140                    cl::Hidden, cl::cat(BoltOptCategory));
141 
142 static cl::opt<bool>
143     PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
144              cl::Hidden, cl::cat(BoltOptCategory));
145 
146 static cl::opt<bool>
147     PrintProfileStats("print-profile-stats",
148                       cl::desc("print profile quality/bias analysis"),
149                       cl::cat(BoltCategory));
150 
151 static cl::opt<bool>
152     PrintRegReAssign("print-regreassign",
153                      cl::desc("print functions after regreassign pass"),
154                      cl::Hidden, cl::cat(BoltOptCategory));
155 
156 cl::opt<bool>
157     PrintReordered("print-reordered",
158                    cl::desc("print functions after layout optimization"),
159                    cl::Hidden, cl::cat(BoltOptCategory));
160 
161 static cl::opt<bool>
162     PrintReorderedFunctions("print-reordered-functions",
163                             cl::desc("print functions after clustering"),
164                             cl::Hidden, cl::cat(BoltOptCategory));
165 
166 static cl::opt<bool> PrintRetpolineInsertion(
167     "print-retpoline-insertion",
168     cl::desc("print functions after retpoline insertion pass"), cl::Hidden,
169     cl::cat(BoltCategory));
170 
171 static cl::opt<bool> PrintSCTC(
172     "print-sctc",
173     cl::desc("print functions after conditional tail call simplification"),
174     cl::Hidden, cl::cat(BoltOptCategory));
175 
176 static cl::opt<bool> PrintSimplifyROLoads(
177     "print-simplify-rodata-loads",
178     cl::desc("print functions after simplification of RO data loads"),
179     cl::Hidden, cl::cat(BoltOptCategory));
180 
181 static cl::opt<bool>
182     PrintSplit("print-split", cl::desc("print functions after code splitting"),
183                cl::Hidden, cl::cat(BoltOptCategory));
184 
185 static cl::opt<bool>
186     PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
187                cl::Hidden, cl::cat(BoltOptCategory));
188 
189 static cl::opt<bool>
190     PrintFixRelaxations("print-fix-relaxations",
191                         cl::desc("print functions after fix relaxations pass"),
192                         cl::Hidden, cl::cat(BoltOptCategory));
193 
194 static cl::opt<bool>
195     PrintFixRISCVCalls("print-fix-riscv-calls",
196                        cl::desc("print functions after fix RISCV calls pass"),
197                        cl::Hidden, cl::cat(BoltOptCategory));
198 
199 static cl::opt<bool> PrintVeneerElimination(
200     "print-veneer-elimination",
201     cl::desc("print functions after veneer elimination pass"), cl::Hidden,
202     cl::cat(BoltOptCategory));
203 
204 static cl::opt<bool>
205     PrintUCE("print-uce",
206              cl::desc("print functions after unreachable code elimination"),
207              cl::Hidden, cl::cat(BoltOptCategory));
208 
209 static cl::opt<bool> RegReAssign(
210     "reg-reassign",
211     cl::desc(
212         "reassign registers so as to avoid using REX prefixes in hot code"),
213     cl::cat(BoltOptCategory));
214 
215 static cl::opt<bool> SimplifyConditionalTailCalls(
216     "simplify-conditional-tail-calls",
217     cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
218     cl::init(true), cl::cat(BoltOptCategory));
219 
220 static cl::opt<bool> SimplifyRODataLoads(
221     "simplify-rodata-loads",
222     cl::desc("simplify loads from read-only sections by replacing the memory "
223              "operand with the constant found in the corresponding section"),
224     cl::cat(BoltOptCategory));
225 
226 static cl::list<std::string>
227 SpecializeMemcpy1("memcpy1-spec",
228   cl::desc("list of functions with call sites for which to specialize memcpy() "
229            "for size 1"),
230   cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
231   cl::ZeroOrMore, cl::cat(BoltOptCategory));
232 
233 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
234                            cl::cat(BoltOptCategory));
235 
236 static cl::opt<bool> StringOps(
237     "inline-memcpy",
238     cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
239     cl::cat(BoltOptCategory));
240 
241 static cl::opt<bool> StripRepRet(
242     "strip-rep-ret",
243     cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
244     cl::init(true), cl::cat(BoltOptCategory));
245 
246 static cl::opt<bool> VerifyCFG("verify-cfg",
247                                cl::desc("verify the CFG after every pass"),
248                                cl::Hidden, cl::cat(BoltOptCategory));
249 
250 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
251                                         cl::desc("reorder three way branches"),
252                                         cl::ReallyHidden,
253                                         cl::cat(BoltOptCategory));
254 
255 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
256                                         cl::desc("fold jcc+mov into cmov"),
257                                         cl::ReallyHidden,
258                                         cl::cat(BoltOptCategory));
259 
260 } // namespace opts
261 
262 namespace llvm {
263 namespace bolt {
264 
265 using namespace opts;
266 
267 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
268 const char BinaryFunctionPassManager::TimerGroupDesc[] =
269     "Binary Function Pass Manager";
270 
271 Error BinaryFunctionPassManager::runPasses() {
272   auto &BFs = BC.getBinaryFunctions();
273   for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
274     const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
275         &OptPassPair = Passes[PassIdx];
276     if (!OptPassPair.first)
277       continue;
278 
279     const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
280     std::string PassIdName =
281         formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
282 
283     if (opts::Verbosity > 0)
284       BC.outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
285 
286     NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
287                        TimerGroupDesc, TimeOpts);
288 
289     Error E = Error::success();
290     callWithDynoStats(
291         BC.outs(),
292         [this, &E, &Pass] {
293           E = joinErrors(std::move(E), Pass->runOnFunctions(BC));
294         },
295         BFs, Pass->getName(), opts::DynoStatsAll, BC.isAArch64());
296     if (E)
297       return Error(std::move(E));
298 
299     if (opts::VerifyCFG &&
300         !std::accumulate(
301             BFs.begin(), BFs.end(), true,
302             [](const bool Valid,
303                const std::pair<const uint64_t, BinaryFunction> &It) {
304               return Valid && It.second.validateCFG();
305             })) {
306       return createFatalBOLTError(
307           Twine("BOLT-ERROR: Invalid CFG detected after pass ") +
308           Twine(Pass->getName()) + Twine("\n"));
309     }
310 
311     if (opts::Verbosity > 0)
312       BC.outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
313 
314     if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
315       continue;
316 
317     const std::string Message = std::string("after ") + Pass->getName();
318 
319     for (auto &It : BFs) {
320       BinaryFunction &Function = It.second;
321 
322       if (!Pass->shouldPrint(Function))
323         continue;
324 
325       Function.print(BC.outs(), Message);
326 
327       if (opts::DumpDotAll)
328         Function.dumpGraphForPass(PassIdName);
329     }
330   }
331   return Error::success();
332 }
333 
334 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
335   BinaryFunctionPassManager Manager(BC);
336 
337   const DynoStats InitialDynoStats =
338       getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
339 
340   Manager.registerPass(std::make_unique<AsmDumpPass>(),
341                        opts::AsmDump.getNumOccurrences());
342 
343   if (BC.isAArch64()) {
344     Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations));
345 
346     Manager.registerPass(
347         std::make_unique<VeneerElimination>(PrintVeneerElimination));
348   }
349 
350   if (BC.isRISCV()) {
351     Manager.registerPass(
352         std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls));
353   }
354 
355   // Here we manage dependencies/order manually, since passes are run in the
356   // order they're registered.
357 
358   // Run this pass first to use stats for the original functions.
359   Manager.registerPass(std::make_unique<PrintProgramStats>());
360 
361   if (opts::PrintProfileStats)
362     Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
363 
364   Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
365 
366   Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
367 
368   if (opts::Instrument)
369     Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
370   else if (opts::Hugify)
371     Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
372 
373   Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
374 
375   Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint),
376                        !opts::KeepNops);
377 
378   Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
379 
380   if (BC.isX86())
381     Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
382                          opts::StripRepRet);
383 
384   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
385                        opts::ICF);
386 
387   Manager.registerPass(
388       std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
389       !opts::SpecializeMemcpy1.empty());
390 
391   Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
392                        opts::StringOps);
393 
394   Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
395 
396   Manager.registerPass(
397       std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
398       opts::JTFootprintReductionFlag);
399 
400   Manager.registerPass(
401       std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
402       opts::SimplifyRODataLoads);
403 
404   Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
405                        opts::RegReAssign);
406 
407   Manager.registerPass(std::make_unique<Inliner>(PrintInline));
408 
409   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
410                        opts::ICF);
411 
412   Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
413 
414   Manager.registerPass(std::make_unique<ThreeWayBranch>(),
415                        opts::ThreeWayBranchFlag);
416 
417   Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
418 
419   Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
420                        opts::EliminateUnreachable);
421 
422   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
423 
424   Manager.registerPass(std::make_unique<LoopInversionPass>());
425 
426   Manager.registerPass(std::make_unique<TailDuplication>());
427 
428   Manager.registerPass(std::make_unique<CMOVConversion>(),
429                        opts::CMOVConversionFlag);
430 
431   // This pass syncs local branches with CFG. If any of the following
432   // passes breaks the sync - they either need to re-run the pass or
433   // fix branches consistency internally.
434   Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
435 
436   // This pass should come close to last since it uses the estimated hot
437   // size of a function to determine the order.  It should definitely
438   // also happen after any changes to the call graph are made, e.g. inlining.
439   Manager.registerPass(
440       std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
441 
442   // This is the second run of the SplitFunctions pass required by certain
443   // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
444   // after ReorderFunctions allows the finalized function order to be utilized
445   // to make more sophisticated splitting decisions, like hot-warm-cold
446   // splitting.
447   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
448 
449   // Print final dyno stats right while CFG and instruction analysis are intact.
450   Manager.registerPass(
451       std::make_unique<DynoStatsPrintPass>(
452           InitialDynoStats, "after all optimizations before SCTC and FOP"),
453       opts::PrintDynoStats || opts::DynoStatsAll);
454 
455   // Add the StokeInfo pass, which extract functions for stoke optimization and
456   // get the liveness information for them
457   Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
458 
459   // This pass introduces conditional jumps into external functions.
460   // Between extending CFG to support this and isolating this pass we chose
461   // the latter. Thus this pass will do double jump removal and unreachable
462   // code elimination if necessary and won't rely on peepholes/UCE for these
463   // optimizations.
464   // More generally this pass should be the last optimization pass that
465   // modifies branches/control flow.  This pass is run after function
466   // reordering so that it can tell whether calls are forward/backward
467   // accurately.
468   Manager.registerPass(
469       std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
470       opts::SimplifyConditionalTailCalls);
471 
472   Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
473 
474   Manager.registerPass(std::make_unique<AlignerPass>());
475 
476   // Perform reordering on data contained in one or more sections using
477   // memory profiling data.
478   Manager.registerPass(std::make_unique<ReorderData>());
479 
480   if (BC.isAArch64()) {
481     Manager.registerPass(std::make_unique<ADRRelaxationPass>());
482 
483     // Tighten branches according to offset differences between branch and
484     // targets. No extra instructions after this pass, otherwise we may have
485     // relocations out of range and crash during linking.
486     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
487   }
488 
489   // This pass should always run last.*
490   Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
491 
492   // FrameOptimizer has an implicit dependency on FinalizeFunctions.
493   // FrameOptimizer move values around and needs to update CFIs. To do this, it
494   // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
495   // placed according to the final layout.
496   Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
497 
498   Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
499 
500   Manager.registerPass(
501       std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
502 
503   // Assign each function an output section.
504   Manager.registerPass(std::make_unique<AssignSections>());
505 
506   // Patch original function entries
507   if (BC.HasRelocations)
508     Manager.registerPass(std::make_unique<PatchEntries>());
509 
510   // This pass turns tail calls into jumps which makes them invisible to
511   // function reordering. It's unsafe to use any CFG or instruction analysis
512   // after this point.
513   Manager.registerPass(
514       std::make_unique<InstructionLowering>(PrintAfterLowering));
515 
516   // In non-relocation mode, mark functions that do not fit into their original
517   // space as non-simple if we have to (e.g. for correct debug info update).
518   // NOTE: this pass depends on finalized code.
519   if (!BC.HasRelocations)
520     Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
521 
522   Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
523 
524   // Check for dirty state of MCSymbols caused by running calculateEmittedSize
525   // in parallel and restore them
526   Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint));
527 
528   return Manager.runPasses();
529 }
530 
531 } // namespace bolt
532 } // namespace llvm
533