xref: /llvm-project/bolt/lib/Rewrite/BinaryPassManager.cpp (revision f3dc732b3623c86164f9d95f7563f982cecc5558)
1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FixRISCVCallsPass.h"
16 #include "bolt/Passes/FixRelaxationPass.h"
17 #include "bolt/Passes/FrameOptimizer.h"
18 #include "bolt/Passes/Hugify.h"
19 #include "bolt/Passes/IdenticalCodeFolding.h"
20 #include "bolt/Passes/IndirectCallPromotion.h"
21 #include "bolt/Passes/Inliner.h"
22 #include "bolt/Passes/Instrumentation.h"
23 #include "bolt/Passes/JTFootprintReduction.h"
24 #include "bolt/Passes/LongJmp.h"
25 #include "bolt/Passes/LoopInversionPass.h"
26 #include "bolt/Passes/MCF.h"
27 #include "bolt/Passes/PLTCall.h"
28 #include "bolt/Passes/PatchEntries.h"
29 #include "bolt/Passes/RegReAssign.h"
30 #include "bolt/Passes/ReorderData.h"
31 #include "bolt/Passes/ReorderFunctions.h"
32 #include "bolt/Passes/RetpolineInsertion.h"
33 #include "bolt/Passes/SplitFunctions.h"
34 #include "bolt/Passes/StokeInfo.h"
35 #include "bolt/Passes/TailDuplication.h"
36 #include "bolt/Passes/ThreeWayBranch.h"
37 #include "bolt/Passes/ValidateInternalCalls.h"
38 #include "bolt/Passes/ValidateMemRefs.h"
39 #include "bolt/Passes/VeneerElimination.h"
40 #include "bolt/Utils/CommandLineOpts.h"
41 #include "llvm/Support/FormatVariadic.h"
42 #include "llvm/Support/Timer.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include <memory>
45 #include <numeric>
46 
47 using namespace llvm;
48 
49 namespace opts {
50 
51 extern cl::opt<bool> PrintAll;
52 extern cl::opt<bool> PrintDynoStats;
53 extern cl::opt<bool> DumpDotAll;
54 extern cl::opt<std::string> AsmDump;
55 extern cl::opt<bolt::PLTCall::OptType> PLT;
56 
57 static cl::opt<bool>
58 DynoStatsAll("dyno-stats-all",
59   cl::desc("print dyno stats after each stage"),
60   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
61 
62 static cl::opt<bool>
63     EliminateUnreachable("eliminate-unreachable",
64                          cl::desc("eliminate unreachable code"), cl::init(true),
65                          cl::cat(BoltOptCategory));
66 
67 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
68                   cl::cat(BoltOptCategory));
69 
70 static cl::opt<bool> JTFootprintReductionFlag(
71     "jt-footprint-reduction",
72     cl::desc("make jump tables size smaller at the cost of using more "
73              "instructions at jump sites"),
74     cl::cat(BoltOptCategory));
75 
76 cl::opt<bool>
77     KeepNops("keep-nops",
78              cl::desc("keep no-op instructions. By default they are removed."),
79              cl::Hidden, cl::cat(BoltOptCategory));
80 
81 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
82                          cl::ReallyHidden, cl::cat(BoltOptCategory));
83 
84 cl::opt<bool>
85 PrintAfterBranchFixup("print-after-branch-fixup",
86   cl::desc("print function after fixing local branches"),
87   cl::Hidden, cl::cat(BoltOptCategory));
88 
89 static cl::opt<bool>
90 PrintAfterLowering("print-after-lowering",
91   cl::desc("print function after instruction lowering"),
92   cl::Hidden, cl::cat(BoltOptCategory));
93 
94 static cl::opt<bool> PrintEstimateEdgeCounts(
95     "print-estimate-edge-counts",
96     cl::desc("print function after edge counts are set for no-LBR profile"),
97     cl::Hidden, cl::cat(BoltOptCategory));
98 
99 cl::opt<bool>
100 PrintFinalized("print-finalized",
101   cl::desc("print function after CFG is finalized"),
102   cl::Hidden, cl::cat(BoltOptCategory));
103 
104 static cl::opt<bool>
105     PrintFOP("print-fop",
106              cl::desc("print functions after frame optimizer pass"), cl::Hidden,
107              cl::cat(BoltOptCategory));
108 
109 static cl::opt<bool>
110     PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
111              cl::Hidden, cl::cat(BoltOptCategory));
112 
113 static cl::opt<bool>
114     PrintICP("print-icp",
115              cl::desc("print functions after indirect call promotion"),
116              cl::Hidden, cl::cat(BoltOptCategory));
117 
118 static cl::opt<bool>
119     PrintInline("print-inline",
120                 cl::desc("print functions after inlining optimization"),
121                 cl::Hidden, cl::cat(BoltOptCategory));
122 
123 static cl::opt<bool> PrintJTFootprintReduction(
124     "print-after-jt-footprint-reduction",
125     cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
126     cl::cat(BoltOptCategory));
127 
128 static cl::opt<bool>
129     PrintLongJmp("print-longjmp",
130                  cl::desc("print functions after longjmp pass"), cl::Hidden,
131                  cl::cat(BoltOptCategory));
132 
133 cl::opt<bool>
134     PrintNormalized("print-normalized",
135                     cl::desc("print functions after CFG is normalized"),
136                     cl::Hidden, cl::cat(BoltCategory));
137 
138 static cl::opt<bool> PrintOptimizeBodyless(
139     "print-optimize-bodyless",
140     cl::desc("print functions after bodyless optimization"), cl::Hidden,
141     cl::cat(BoltOptCategory));
142 
143 static cl::opt<bool>
144     PrintPeepholes("print-peepholes",
145                    cl::desc("print functions after peephole optimization"),
146                    cl::Hidden, cl::cat(BoltOptCategory));
147 
148 static cl::opt<bool>
149     PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
150              cl::Hidden, cl::cat(BoltOptCategory));
151 
152 static cl::opt<bool>
153     PrintProfileStats("print-profile-stats",
154                       cl::desc("print profile quality/bias analysis"),
155                       cl::cat(BoltCategory));
156 
157 static cl::opt<bool>
158     PrintRegReAssign("print-regreassign",
159                      cl::desc("print functions after regreassign pass"),
160                      cl::Hidden, cl::cat(BoltOptCategory));
161 
162 cl::opt<bool>
163     PrintReordered("print-reordered",
164                    cl::desc("print functions after layout optimization"),
165                    cl::Hidden, cl::cat(BoltOptCategory));
166 
167 static cl::opt<bool>
168     PrintReorderedFunctions("print-reordered-functions",
169                             cl::desc("print functions after clustering"),
170                             cl::Hidden, cl::cat(BoltOptCategory));
171 
172 static cl::opt<bool> PrintRetpolineInsertion(
173     "print-retpoline-insertion",
174     cl::desc("print functions after retpoline insertion pass"), cl::Hidden,
175     cl::cat(BoltCategory));
176 
177 static cl::opt<bool> PrintSCTC(
178     "print-sctc",
179     cl::desc("print functions after conditional tail call simplification"),
180     cl::Hidden, cl::cat(BoltOptCategory));
181 
182 static cl::opt<bool> PrintSimplifyROLoads(
183     "print-simplify-rodata-loads",
184     cl::desc("print functions after simplification of RO data loads"),
185     cl::Hidden, cl::cat(BoltOptCategory));
186 
187 static cl::opt<bool>
188     PrintSplit("print-split", cl::desc("print functions after code splitting"),
189                cl::Hidden, cl::cat(BoltOptCategory));
190 
191 static cl::opt<bool>
192     PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
193                cl::Hidden, cl::cat(BoltOptCategory));
194 
195 static cl::opt<bool>
196     PrintFixRelaxations("print-fix-relaxations",
197                         cl::desc("print functions after fix relaxations pass"),
198                         cl::Hidden, cl::cat(BoltOptCategory));
199 
200 static cl::opt<bool>
201     PrintFixRISCVCalls("print-fix-riscv-calls",
202                        cl::desc("print functions after fix RISCV calls pass"),
203                        cl::Hidden, cl::cat(BoltOptCategory));
204 
205 static cl::opt<bool> PrintVeneerElimination(
206     "print-veneer-elimination",
207     cl::desc("print functions after veneer elimination pass"), cl::Hidden,
208     cl::cat(BoltOptCategory));
209 
210 static cl::opt<bool>
211     PrintUCE("print-uce",
212              cl::desc("print functions after unreachable code elimination"),
213              cl::Hidden, cl::cat(BoltOptCategory));
214 
215 static cl::opt<bool> RegReAssign(
216     "reg-reassign",
217     cl::desc(
218         "reassign registers so as to avoid using REX prefixes in hot code"),
219     cl::cat(BoltOptCategory));
220 
221 static cl::opt<bool> SimplifyConditionalTailCalls(
222     "simplify-conditional-tail-calls",
223     cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
224     cl::init(true), cl::cat(BoltOptCategory));
225 
226 static cl::opt<bool> SimplifyRODataLoads(
227     "simplify-rodata-loads",
228     cl::desc("simplify loads from read-only sections by replacing the memory "
229              "operand with the constant found in the corresponding section"),
230     cl::cat(BoltOptCategory));
231 
232 static cl::list<std::string>
233 SpecializeMemcpy1("memcpy1-spec",
234   cl::desc("list of functions with call sites for which to specialize memcpy() "
235            "for size 1"),
236   cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
237   cl::ZeroOrMore, cl::cat(BoltOptCategory));
238 
239 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
240                            cl::cat(BoltOptCategory));
241 
242 static cl::opt<bool> StringOps(
243     "inline-memcpy",
244     cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
245     cl::cat(BoltOptCategory));
246 
247 static cl::opt<bool> StripRepRet(
248     "strip-rep-ret",
249     cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
250     cl::init(true), cl::cat(BoltOptCategory));
251 
252 static cl::opt<bool> VerifyCFG("verify-cfg",
253                                cl::desc("verify the CFG after every pass"),
254                                cl::Hidden, cl::cat(BoltOptCategory));
255 
256 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
257                                         cl::desc("reorder three way branches"),
258                                         cl::ReallyHidden,
259                                         cl::cat(BoltOptCategory));
260 
261 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
262                                         cl::desc("fold jcc+mov into cmov"),
263                                         cl::ReallyHidden,
264                                         cl::cat(BoltOptCategory));
265 
266 } // namespace opts
267 
268 namespace llvm {
269 namespace bolt {
270 
271 using namespace opts;
272 
273 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
274 const char BinaryFunctionPassManager::TimerGroupDesc[] =
275     "Binary Function Pass Manager";
276 
277 Error BinaryFunctionPassManager::runPasses() {
278   auto &BFs = BC.getBinaryFunctions();
279   for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
280     const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
281         &OptPassPair = Passes[PassIdx];
282     if (!OptPassPair.first)
283       continue;
284 
285     const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
286     std::string PassIdName =
287         formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
288 
289     if (opts::Verbosity > 0)
290       BC.outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
291 
292     NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
293                        TimerGroupDesc, TimeOpts);
294 
295     Error E = Error::success();
296     callWithDynoStats(
297         BC.outs(),
298         [this, &E, &Pass] {
299           E = joinErrors(std::move(E), Pass->runOnFunctions(BC));
300         },
301         BFs, Pass->getName(), opts::DynoStatsAll, BC.isAArch64());
302     if (E)
303       return Error(std::move(E));
304 
305     if (opts::VerifyCFG &&
306         !std::accumulate(
307             BFs.begin(), BFs.end(), true,
308             [](const bool Valid,
309                const std::pair<const uint64_t, BinaryFunction> &It) {
310               return Valid && It.second.validateCFG();
311             })) {
312       return createFatalBOLTError(
313           Twine("BOLT-ERROR: Invalid CFG detected after pass ") +
314           Twine(Pass->getName()) + Twine("\n"));
315     }
316 
317     if (opts::Verbosity > 0)
318       BC.outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
319 
320     if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
321       continue;
322 
323     const std::string Message = std::string("after ") + Pass->getName();
324 
325     for (auto &It : BFs) {
326       BinaryFunction &Function = It.second;
327 
328       if (!Pass->shouldPrint(Function))
329         continue;
330 
331       Function.print(BC.outs(), Message);
332 
333       if (opts::DumpDotAll)
334         Function.dumpGraphForPass(PassIdName);
335     }
336   }
337   return Error::success();
338 }
339 
340 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
341   BinaryFunctionPassManager Manager(BC);
342 
343   Manager.registerPass(
344       std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts));
345 
346   const DynoStats InitialDynoStats =
347       getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
348 
349   Manager.registerPass(std::make_unique<AsmDumpPass>(),
350                        opts::AsmDump.getNumOccurrences());
351 
352   if (BC.isAArch64()) {
353     Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations));
354 
355     Manager.registerPass(
356         std::make_unique<VeneerElimination>(PrintVeneerElimination));
357   }
358 
359   if (BC.isRISCV()) {
360     Manager.registerPass(
361         std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls));
362   }
363 
364   // Here we manage dependencies/order manually, since passes are run in the
365   // order they're registered.
366 
367   // Run this pass first to use stats for the original functions.
368   Manager.registerPass(std::make_unique<PrintProgramStats>());
369 
370   if (opts::PrintProfileStats)
371     Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
372 
373   Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
374 
375   Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
376 
377   if (opts::Instrument)
378     Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
379   else if (opts::Hugify)
380     Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
381 
382   Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
383 
384   Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint),
385                        !opts::KeepNops);
386 
387   Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
388 
389   if (BC.isX86())
390     Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
391                          opts::StripRepRet);
392 
393   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
394                        opts::ICF);
395 
396   Manager.registerPass(
397       std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
398       !opts::SpecializeMemcpy1.empty());
399 
400   Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
401                        opts::StringOps);
402 
403   Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
404 
405   Manager.registerPass(
406       std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
407       opts::JTFootprintReductionFlag);
408 
409   Manager.registerPass(
410       std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
411       opts::SimplifyRODataLoads);
412 
413   Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
414                        opts::RegReAssign);
415 
416   Manager.registerPass(std::make_unique<Inliner>(PrintInline));
417 
418   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
419                        opts::ICF);
420 
421   Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
422 
423   Manager.registerPass(std::make_unique<ThreeWayBranch>(),
424                        opts::ThreeWayBranchFlag);
425 
426   Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
427 
428   Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
429                        opts::EliminateUnreachable);
430 
431   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
432 
433   Manager.registerPass(std::make_unique<LoopInversionPass>());
434 
435   Manager.registerPass(std::make_unique<TailDuplication>());
436 
437   Manager.registerPass(std::make_unique<CMOVConversion>(),
438                        opts::CMOVConversionFlag);
439 
440   // This pass syncs local branches with CFG. If any of the following
441   // passes breaks the sync - they either need to re-run the pass or
442   // fix branches consistency internally.
443   Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
444 
445   // This pass should come close to last since it uses the estimated hot
446   // size of a function to determine the order.  It should definitely
447   // also happen after any changes to the call graph are made, e.g. inlining.
448   Manager.registerPass(
449       std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
450 
451   // This is the second run of the SplitFunctions pass required by certain
452   // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
453   // after ReorderFunctions allows the finalized function order to be utilized
454   // to make more sophisticated splitting decisions, like hot-warm-cold
455   // splitting.
456   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
457 
458   // Print final dyno stats right while CFG and instruction analysis are intact.
459   Manager.registerPass(
460       std::make_unique<DynoStatsPrintPass>(
461           InitialDynoStats, "after all optimizations before SCTC and FOP"),
462       opts::PrintDynoStats || opts::DynoStatsAll);
463 
464   // Add the StokeInfo pass, which extract functions for stoke optimization and
465   // get the liveness information for them
466   Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
467 
468   // This pass introduces conditional jumps into external functions.
469   // Between extending CFG to support this and isolating this pass we chose
470   // the latter. Thus this pass will do double jump removal and unreachable
471   // code elimination if necessary and won't rely on peepholes/UCE for these
472   // optimizations.
473   // More generally this pass should be the last optimization pass that
474   // modifies branches/control flow.  This pass is run after function
475   // reordering so that it can tell whether calls are forward/backward
476   // accurately.
477   Manager.registerPass(
478       std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
479       opts::SimplifyConditionalTailCalls);
480 
481   Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
482 
483   Manager.registerPass(std::make_unique<AlignerPass>());
484 
485   // Perform reordering on data contained in one or more sections using
486   // memory profiling data.
487   Manager.registerPass(std::make_unique<ReorderData>());
488 
489   if (BC.isAArch64()) {
490     Manager.registerPass(std::make_unique<ADRRelaxationPass>());
491 
492     // Tighten branches according to offset differences between branch and
493     // targets. No extra instructions after this pass, otherwise we may have
494     // relocations out of range and crash during linking.
495     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
496   }
497 
498   // This pass should always run last.*
499   Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
500 
501   // FrameOptimizer has an implicit dependency on FinalizeFunctions.
502   // FrameOptimizer move values around and needs to update CFIs. To do this, it
503   // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
504   // placed according to the final layout.
505   Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
506 
507   Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
508 
509   Manager.registerPass(
510       std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
511 
512   // Assign each function an output section.
513   Manager.registerPass(std::make_unique<AssignSections>());
514 
515   // Patch original function entries
516   if (BC.HasRelocations)
517     Manager.registerPass(std::make_unique<PatchEntries>());
518 
519   // This pass turns tail calls into jumps which makes them invisible to
520   // function reordering. It's unsafe to use any CFG or instruction analysis
521   // after this point.
522   Manager.registerPass(
523       std::make_unique<InstructionLowering>(PrintAfterLowering));
524 
525   // In non-relocation mode, mark functions that do not fit into their original
526   // space as non-simple if we have to (e.g. for correct debug info update).
527   // NOTE: this pass depends on finalized code.
528   if (!BC.HasRelocations)
529     Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
530 
531   Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
532 
533   // Check for dirty state of MCSymbols caused by running calculateEmittedSize
534   // in parallel and restore them
535   Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint));
536 
537   return Manager.runPasses();
538 }
539 
540 } // namespace bolt
541 } // namespace llvm
542