xref: /llvm-project/bolt/lib/Rewrite/BinaryPassManager.cpp (revision 3c357a49d61e4c81a1ac016502ee504521bc8dda)
1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/ContinuityStats.h"
16 #include "bolt/Passes/FixRISCVCallsPass.h"
17 #include "bolt/Passes/FixRelaxationPass.h"
18 #include "bolt/Passes/FrameOptimizer.h"
19 #include "bolt/Passes/Hugify.h"
20 #include "bolt/Passes/IdenticalCodeFolding.h"
21 #include "bolt/Passes/IndirectCallPromotion.h"
22 #include "bolt/Passes/Inliner.h"
23 #include "bolt/Passes/Instrumentation.h"
24 #include "bolt/Passes/JTFootprintReduction.h"
25 #include "bolt/Passes/LongJmp.h"
26 #include "bolt/Passes/LoopInversionPass.h"
27 #include "bolt/Passes/MCF.h"
28 #include "bolt/Passes/PLTCall.h"
29 #include "bolt/Passes/PatchEntries.h"
30 #include "bolt/Passes/RegReAssign.h"
31 #include "bolt/Passes/ReorderData.h"
32 #include "bolt/Passes/ReorderFunctions.h"
33 #include "bolt/Passes/RetpolineInsertion.h"
34 #include "bolt/Passes/SplitFunctions.h"
35 #include "bolt/Passes/StokeInfo.h"
36 #include "bolt/Passes/TailDuplication.h"
37 #include "bolt/Passes/ThreeWayBranch.h"
38 #include "bolt/Passes/ValidateInternalCalls.h"
39 #include "bolt/Passes/ValidateMemRefs.h"
40 #include "bolt/Passes/VeneerElimination.h"
41 #include "bolt/Utils/CommandLineOpts.h"
42 #include "llvm/Support/FormatVariadic.h"
43 #include "llvm/Support/Timer.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include <memory>
46 #include <numeric>
47 
48 using namespace llvm;
49 
50 namespace opts {
51 
52 extern cl::opt<bool> PrintAll;
53 extern cl::opt<bool> PrintDynoStats;
54 extern cl::opt<bool> DumpDotAll;
55 extern cl::opt<std::string> AsmDump;
56 extern cl::opt<bolt::PLTCall::OptType> PLT;
57 extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
58                llvm::bolt::DeprecatedICFNumericOptionParser>
59     ICF;
60 
61 static cl::opt<bool>
62 DynoStatsAll("dyno-stats-all",
63   cl::desc("print dyno stats after each stage"),
64   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
65 
66 static cl::opt<bool>
67     EliminateUnreachable("eliminate-unreachable",
68                          cl::desc("eliminate unreachable code"), cl::init(true),
69                          cl::cat(BoltOptCategory));
70 
71 static cl::opt<bool> JTFootprintReductionFlag(
72     "jt-footprint-reduction",
73     cl::desc("make jump tables size smaller at the cost of using more "
74              "instructions at jump sites"),
75     cl::cat(BoltOptCategory));
76 
77 cl::opt<bool>
78     KeepNops("keep-nops",
79              cl::desc("keep no-op instructions. By default they are removed."),
80              cl::Hidden, cl::cat(BoltOptCategory));
81 
82 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
83                          cl::ReallyHidden, cl::cat(BoltOptCategory));
84 
85 cl::opt<bool>
86 PrintAfterBranchFixup("print-after-branch-fixup",
87   cl::desc("print function after fixing local branches"),
88   cl::Hidden, cl::cat(BoltOptCategory));
89 
90 static cl::opt<bool>
91 PrintAfterLowering("print-after-lowering",
92   cl::desc("print function after instruction lowering"),
93   cl::Hidden, cl::cat(BoltOptCategory));
94 
95 static cl::opt<bool> PrintEstimateEdgeCounts(
96     "print-estimate-edge-counts",
97     cl::desc("print function after edge counts are set for no-LBR profile"),
98     cl::Hidden, cl::cat(BoltOptCategory));
99 
100 cl::opt<bool>
101 PrintFinalized("print-finalized",
102   cl::desc("print function after CFG is finalized"),
103   cl::Hidden, cl::cat(BoltOptCategory));
104 
105 static cl::opt<bool>
106     PrintFOP("print-fop",
107              cl::desc("print functions after frame optimizer pass"), cl::Hidden,
108              cl::cat(BoltOptCategory));
109 
110 static cl::opt<bool>
111     PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
112              cl::Hidden, cl::cat(BoltOptCategory));
113 
114 static cl::opt<bool>
115     PrintICP("print-icp",
116              cl::desc("print functions after indirect call promotion"),
117              cl::Hidden, cl::cat(BoltOptCategory));
118 
119 static cl::opt<bool>
120     PrintInline("print-inline",
121                 cl::desc("print functions after inlining optimization"),
122                 cl::Hidden, cl::cat(BoltOptCategory));
123 
124 static cl::opt<bool> PrintJTFootprintReduction(
125     "print-after-jt-footprint-reduction",
126     cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
127     cl::cat(BoltOptCategory));
128 
129 static cl::opt<bool>
130     PrintAdrRelaxation("print-adr-relaxation",
131                        cl::desc("print functions after ADR Relaxation pass"),
132                        cl::Hidden, cl::cat(BoltOptCategory));
133 
134 static cl::opt<bool>
135     PrintLongJmp("print-longjmp",
136                  cl::desc("print functions after longjmp pass"), cl::Hidden,
137                  cl::cat(BoltOptCategory));
138 
139 cl::opt<bool>
140     PrintNormalized("print-normalized",
141                     cl::desc("print functions after CFG is normalized"),
142                     cl::Hidden, cl::cat(BoltCategory));
143 
144 static cl::opt<bool> PrintOptimizeBodyless(
145     "print-optimize-bodyless",
146     cl::desc("print functions after bodyless optimization"), cl::Hidden,
147     cl::cat(BoltOptCategory));
148 
149 static cl::opt<bool>
150     PrintPeepholes("print-peepholes",
151                    cl::desc("print functions after peephole optimization"),
152                    cl::Hidden, cl::cat(BoltOptCategory));
153 
154 static cl::opt<bool>
155     PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
156              cl::Hidden, cl::cat(BoltOptCategory));
157 
158 static cl::opt<bool>
159     PrintProfileStats("print-profile-stats",
160                       cl::desc("print profile quality/bias analysis"),
161                       cl::cat(BoltCategory));
162 
163 static cl::opt<bool>
164     PrintRegReAssign("print-regreassign",
165                      cl::desc("print functions after regreassign pass"),
166                      cl::Hidden, cl::cat(BoltOptCategory));
167 
168 cl::opt<bool>
169     PrintReordered("print-reordered",
170                    cl::desc("print functions after layout optimization"),
171                    cl::Hidden, cl::cat(BoltOptCategory));
172 
173 static cl::opt<bool>
174     PrintReorderedFunctions("print-reordered-functions",
175                             cl::desc("print functions after clustering"),
176                             cl::Hidden, cl::cat(BoltOptCategory));
177 
178 static cl::opt<bool> PrintRetpolineInsertion(
179     "print-retpoline-insertion",
180     cl::desc("print functions after retpoline insertion pass"), cl::Hidden,
181     cl::cat(BoltCategory));
182 
183 static cl::opt<bool> PrintSCTC(
184     "print-sctc",
185     cl::desc("print functions after conditional tail call simplification"),
186     cl::Hidden, cl::cat(BoltOptCategory));
187 
188 static cl::opt<bool> PrintSimplifyROLoads(
189     "print-simplify-rodata-loads",
190     cl::desc("print functions after simplification of RO data loads"),
191     cl::Hidden, cl::cat(BoltOptCategory));
192 
193 static cl::opt<bool>
194     PrintSplit("print-split", cl::desc("print functions after code splitting"),
195                cl::Hidden, cl::cat(BoltOptCategory));
196 
197 static cl::opt<bool>
198     PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
199                cl::Hidden, cl::cat(BoltOptCategory));
200 
201 static cl::opt<bool>
202     PrintFixRelaxations("print-fix-relaxations",
203                         cl::desc("print functions after fix relaxations pass"),
204                         cl::Hidden, cl::cat(BoltOptCategory));
205 
206 static cl::opt<bool>
207     PrintFixRISCVCalls("print-fix-riscv-calls",
208                        cl::desc("print functions after fix RISCV calls pass"),
209                        cl::Hidden, cl::cat(BoltOptCategory));
210 
211 static cl::opt<bool> PrintVeneerElimination(
212     "print-veneer-elimination",
213     cl::desc("print functions after veneer elimination pass"), cl::Hidden,
214     cl::cat(BoltOptCategory));
215 
216 static cl::opt<bool>
217     PrintUCE("print-uce",
218              cl::desc("print functions after unreachable code elimination"),
219              cl::Hidden, cl::cat(BoltOptCategory));
220 
221 static cl::opt<bool> RegReAssign(
222     "reg-reassign",
223     cl::desc(
224         "reassign registers so as to avoid using REX prefixes in hot code"),
225     cl::cat(BoltOptCategory));
226 
227 static cl::opt<bool> SimplifyConditionalTailCalls(
228     "simplify-conditional-tail-calls",
229     cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
230     cl::init(true), cl::cat(BoltOptCategory));
231 
232 static cl::opt<bool> SimplifyRODataLoads(
233     "simplify-rodata-loads",
234     cl::desc("simplify loads from read-only sections by replacing the memory "
235              "operand with the constant found in the corresponding section"),
236     cl::cat(BoltOptCategory));
237 
238 static cl::list<std::string>
239 SpecializeMemcpy1("memcpy1-spec",
240   cl::desc("list of functions with call sites for which to specialize memcpy() "
241            "for size 1"),
242   cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
243   cl::ZeroOrMore, cl::cat(BoltOptCategory));
244 
245 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
246                            cl::cat(BoltOptCategory));
247 
248 static cl::opt<bool> StringOps(
249     "inline-memcpy",
250     cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
251     cl::cat(BoltOptCategory));
252 
253 static cl::opt<bool> StripRepRet(
254     "strip-rep-ret",
255     cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
256     cl::init(true), cl::cat(BoltOptCategory));
257 
258 static cl::opt<bool> VerifyCFG("verify-cfg",
259                                cl::desc("verify the CFG after every pass"),
260                                cl::Hidden, cl::cat(BoltOptCategory));
261 
262 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
263                                         cl::desc("reorder three way branches"),
264                                         cl::ReallyHidden,
265                                         cl::cat(BoltOptCategory));
266 
267 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
268                                         cl::desc("fold jcc+mov into cmov"),
269                                         cl::ReallyHidden,
270                                         cl::cat(BoltOptCategory));
271 
272 static cl::opt<bool> ShortenInstructions("shorten-instructions",
273                                          cl::desc("shorten instructions"),
274                                          cl::init(true),
275                                          cl::cat(BoltOptCategory));
276 } // namespace opts
277 
278 namespace llvm {
279 namespace bolt {
280 
281 using namespace opts;
282 
283 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
284 const char BinaryFunctionPassManager::TimerGroupDesc[] =
285     "Binary Function Pass Manager";
286 
287 Error BinaryFunctionPassManager::runPasses() {
288   auto &BFs = BC.getBinaryFunctions();
289   for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
290     const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
291         &OptPassPair = Passes[PassIdx];
292     if (!OptPassPair.first)
293       continue;
294 
295     const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
296     std::string PassIdName =
297         formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
298 
299     if (opts::Verbosity > 0)
300       BC.outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
301 
302     NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
303                        TimerGroupDesc, TimeOpts);
304 
305     Error E = Error::success();
306     callWithDynoStats(
307         BC.outs(),
308         [this, &E, &Pass] {
309           E = joinErrors(std::move(E), Pass->runOnFunctions(BC));
310         },
311         BFs, Pass->getName(), opts::DynoStatsAll, BC.isAArch64());
312     if (E)
313       return Error(std::move(E));
314 
315     if (opts::VerifyCFG &&
316         !std::accumulate(
317             BFs.begin(), BFs.end(), true,
318             [](const bool Valid,
319                const std::pair<const uint64_t, BinaryFunction> &It) {
320               return Valid && It.second.validateCFG();
321             })) {
322       return createFatalBOLTError(
323           Twine("BOLT-ERROR: Invalid CFG detected after pass ") +
324           Twine(Pass->getName()) + Twine("\n"));
325     }
326 
327     if (opts::Verbosity > 0)
328       BC.outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
329 
330     if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
331       continue;
332 
333     const std::string Message = std::string("after ") + Pass->getName();
334 
335     for (auto &It : BFs) {
336       BinaryFunction &Function = It.second;
337 
338       if (!Pass->shouldPrint(Function))
339         continue;
340 
341       Function.print(BC.outs(), Message);
342 
343       if (opts::DumpDotAll)
344         Function.dumpGraphForPass(PassIdName);
345     }
346   }
347   return Error::success();
348 }
349 
350 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
351   BinaryFunctionPassManager Manager(BC);
352 
353   Manager.registerPass(
354       std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts));
355 
356   Manager.registerPass(std::make_unique<DynoStatsSetPass>());
357 
358   Manager.registerPass(std::make_unique<AsmDumpPass>(),
359                        opts::AsmDump.getNumOccurrences());
360 
361   if (BC.isAArch64()) {
362     Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations));
363 
364     Manager.registerPass(
365         std::make_unique<VeneerElimination>(PrintVeneerElimination));
366   }
367 
368   if (BC.isRISCV()) {
369     Manager.registerPass(
370         std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls));
371   }
372 
373   // Here we manage dependencies/order manually, since passes are run in the
374   // order they're registered.
375 
376   // Run this pass first to use stats for the original functions.
377   Manager.registerPass(std::make_unique<PrintProgramStats>());
378 
379   if (opts::PrintProfileStats)
380     Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
381 
382   Manager.registerPass(std::make_unique<PrintContinuityStats>(NeverPrint));
383 
384   Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
385 
386   Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
387 
388   if (opts::Instrument)
389     Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
390   else if (opts::Hugify)
391     Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
392 
393   Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint),
394                        opts::ShortenInstructions);
395 
396   Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint),
397                        !opts::KeepNops);
398 
399   Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
400 
401   if (BC.isX86())
402     Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
403                          opts::StripRepRet);
404 
405   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
406                        opts::ICF != IdenticalCodeFolding::ICFLevel::None);
407 
408   Manager.registerPass(
409       std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
410       !opts::SpecializeMemcpy1.empty());
411 
412   Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
413                        opts::StringOps);
414 
415   Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
416 
417   Manager.registerPass(
418       std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
419       opts::JTFootprintReductionFlag);
420 
421   Manager.registerPass(
422       std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
423       opts::SimplifyRODataLoads);
424 
425   Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
426                        opts::RegReAssign);
427 
428   Manager.registerPass(std::make_unique<Inliner>(PrintInline));
429 
430   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
431                        opts::ICF != IdenticalCodeFolding::ICFLevel::None);
432 
433   Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
434 
435   Manager.registerPass(std::make_unique<ThreeWayBranch>(),
436                        opts::ThreeWayBranchFlag);
437 
438   Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
439 
440   Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
441                        opts::EliminateUnreachable);
442 
443   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
444 
445   Manager.registerPass(std::make_unique<LoopInversionPass>());
446 
447   Manager.registerPass(std::make_unique<TailDuplication>());
448 
449   Manager.registerPass(std::make_unique<CMOVConversion>(),
450                        opts::CMOVConversionFlag);
451 
452   // This pass syncs local branches with CFG. If any of the following
453   // passes breaks the sync - they either need to re-run the pass or
454   // fix branches consistency internally.
455   Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
456 
457   // This pass should come close to last since it uses the estimated hot
458   // size of a function to determine the order.  It should definitely
459   // also happen after any changes to the call graph are made, e.g. inlining.
460   Manager.registerPass(
461       std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
462 
463   // This is the second run of the SplitFunctions pass required by certain
464   // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
465   // after ReorderFunctions allows the finalized function order to be utilized
466   // to make more sophisticated splitting decisions, like hot-warm-cold
467   // splitting.
468   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
469 
470   // Print final dyno stats right while CFG and instruction analysis are intact.
471   Manager.registerPass(std::make_unique<DynoStatsPrintPass>(
472                            "after all optimizations before SCTC and FOP"),
473                        opts::PrintDynoStats || opts::DynoStatsAll);
474 
475   // Add the StokeInfo pass, which extract functions for stoke optimization and
476   // get the liveness information for them
477   Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
478 
479   // This pass introduces conditional jumps into external functions.
480   // Between extending CFG to support this and isolating this pass we chose
481   // the latter. Thus this pass will do double jump removal and unreachable
482   // code elimination if necessary and won't rely on peepholes/UCE for these
483   // optimizations.
484   // More generally this pass should be the last optimization pass that
485   // modifies branches/control flow.  This pass is run after function
486   // reordering so that it can tell whether calls are forward/backward
487   // accurately.
488   Manager.registerPass(
489       std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
490       opts::SimplifyConditionalTailCalls);
491 
492   Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
493 
494   Manager.registerPass(std::make_unique<AlignerPass>());
495 
496   // Perform reordering on data contained in one or more sections using
497   // memory profiling data.
498   Manager.registerPass(std::make_unique<ReorderData>());
499 
500   if (BC.isAArch64()) {
501     Manager.registerPass(
502         std::make_unique<ADRRelaxationPass>(PrintAdrRelaxation));
503 
504     // Tighten branches according to offset differences between branch and
505     // targets. No extra instructions after this pass, otherwise we may have
506     // relocations out of range and crash during linking.
507     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
508   }
509 
510   // This pass should always run last.*
511   Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
512 
513   // FrameOptimizer has an implicit dependency on FinalizeFunctions.
514   // FrameOptimizer move values around and needs to update CFIs. To do this, it
515   // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
516   // placed according to the final layout.
517   Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
518 
519   Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
520 
521   Manager.registerPass(
522       std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
523 
524   // Assign each function an output section.
525   Manager.registerPass(std::make_unique<AssignSections>());
526 
527   // Patch original function entries
528   if (BC.HasRelocations)
529     Manager.registerPass(std::make_unique<PatchEntries>());
530 
531   // This pass turns tail calls into jumps which makes them invisible to
532   // function reordering. It's unsafe to use any CFG or instruction analysis
533   // after this point.
534   Manager.registerPass(
535       std::make_unique<InstructionLowering>(PrintAfterLowering));
536 
537   // In non-relocation mode, mark functions that do not fit into their original
538   // space as non-simple if we have to (e.g. for correct debug info update).
539   // NOTE: this pass depends on finalized code.
540   if (!BC.HasRelocations)
541     Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
542 
543   Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
544 
545   // Check for dirty state of MCSymbols caused by running calculateEmittedSize
546   // in parallel and restore them
547   Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint));
548 
549   return Manager.runPasses();
550 }
551 
552 } // namespace bolt
553 } // namespace llvm
554