xref: /llvm-project/bolt/include/bolt/Passes/BinaryPasses.h (revision a38f0157f2a9efcae13b691c63723426e8adc0ee)
1 //===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The set of optimization/analysis passes that run on BinaryFunctions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef BOLT_PASSES_BINARY_PASSES_H
14 #define BOLT_PASSES_BINARY_PASSES_H
15 
16 #include "bolt/Core/BinaryContext.h"
17 #include "bolt/Core/BinaryFunction.h"
18 #include "bolt/Core/DynoStats.h"
19 #include "bolt/Profile/BoltAddressTranslation.h"
20 #include "llvm/Support/CommandLine.h"
21 #include <atomic>
22 #include <set>
23 #include <string>
24 #include <unordered_set>
25 
26 namespace llvm {
27 namespace bolt {
28 
29 /// An optimization/analysis pass that runs on functions.
30 class BinaryFunctionPass {
31 protected:
32   bool PrintPass;
33 
BinaryFunctionPass(const bool PrintPass)34   explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {}
35 
36   /// Control whether a specific function should be skipped during
37   /// optimization.
38   virtual bool shouldOptimize(const BinaryFunction &BF) const;
39 
40 public:
41   virtual ~BinaryFunctionPass() = default;
42 
43   /// The name of this pass
44   virtual const char *getName() const = 0;
45 
46   /// Control whether debug info is printed after this pass is completed.
printPass()47   bool printPass() const { return PrintPass; }
48 
49   /// Control whether debug info is printed for an individual function after
50   /// this pass is completed (printPass() must have returned true).
51   virtual bool shouldPrint(const BinaryFunction &BF) const;
52 
53   virtual Error runOnFunctions(BinaryContext &BC) = 0;
54 };
55 
56 /// A pass to set initial program-wide dynostats.
57 class DynoStatsSetPass : public BinaryFunctionPass {
58 public:
DynoStatsSetPass()59   DynoStatsSetPass() : BinaryFunctionPass(false) {}
60 
getName()61   const char *getName() const override {
62     return "set dyno-stats before optimizations";
63   }
64 
shouldPrint(const BinaryFunction & BF)65   bool shouldPrint(const BinaryFunction &BF) const override { return false; }
66 
runOnFunctions(BinaryContext & BC)67   Error runOnFunctions(BinaryContext &BC) override {
68     BC.InitialDynoStats = getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
69     return Error::success();
70   }
71 };
72 
73 /// A pass to print program-wide dynostats.
74 class DynoStatsPrintPass : public BinaryFunctionPass {
75 protected:
76   std::string Title;
77 
78 public:
DynoStatsPrintPass(const char * Title)79   DynoStatsPrintPass(const char *Title)
80       : BinaryFunctionPass(false), Title(Title) {}
81 
getName()82   const char *getName() const override {
83     return "print dyno-stats after optimizations";
84   }
85 
shouldPrint(const BinaryFunction & BF)86   bool shouldPrint(const BinaryFunction &BF) const override { return false; }
87 
runOnFunctions(BinaryContext & BC)88   Error runOnFunctions(BinaryContext &BC) override {
89     const DynoStats PrevDynoStats = BC.InitialDynoStats;
90     const DynoStats NewDynoStats =
91         getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
92     const bool Changed = (NewDynoStats != PrevDynoStats);
93     BC.outs() << "BOLT-INFO: program-wide dynostats " << Title
94               << (Changed ? "" : " (no change)") << ":\n\n"
95               << PrevDynoStats;
96     if (Changed) {
97       BC.outs() << '\n';
98       NewDynoStats.print(BC.outs(), &PrevDynoStats, BC.InstPrinter.get());
99     }
100     BC.outs() << '\n';
101     return Error::success();
102   }
103 };
104 
105 /// The pass normalizes CFG by performing the following transformations:
106 ///   * removes empty basic blocks
107 ///   * merges duplicate edges and updates jump instructions
108 class NormalizeCFG : public BinaryFunctionPass {
109   std::atomic<uint64_t> NumBlocksRemoved{0};
110   std::atomic<uint64_t> NumDuplicateEdgesMerged{0};
111 
112   void runOnFunction(BinaryFunction &BF);
113 
114 public:
NormalizeCFG(const cl::opt<bool> & PrintPass)115   NormalizeCFG(const cl::opt<bool> &PrintPass)
116       : BinaryFunctionPass(PrintPass) {}
117 
getName()118   const char *getName() const override { return "normalize CFG"; }
119 
120   Error runOnFunctions(BinaryContext &) override;
121 };
122 
123 /// Detect and eliminate unreachable basic blocks. We could have those
124 /// filled with nops and they are used for alignment.
125 class EliminateUnreachableBlocks : public BinaryFunctionPass {
126   std::unordered_set<const BinaryFunction *> Modified;
127   std::atomic<unsigned> DeletedBlocks{0};
128   std::atomic<uint64_t> DeletedBytes{0};
129   void runOnFunction(BinaryFunction &Function);
130 
131 public:
EliminateUnreachableBlocks(const cl::opt<bool> & PrintPass)132   EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass)
133       : BinaryFunctionPass(PrintPass) {}
134 
getName()135   const char *getName() const override { return "eliminate-unreachable"; }
shouldPrint(const BinaryFunction & BF)136   bool shouldPrint(const BinaryFunction &BF) const override {
137     return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
138   }
139   Error runOnFunctions(BinaryContext &) override;
140 };
141 
142 // Reorder the basic blocks for each function based on hotness.
143 class ReorderBasicBlocks : public BinaryFunctionPass {
144 public:
145   /// Choose which strategy should the block layout heuristic prioritize when
146   /// facing conflicting goals.
147   enum LayoutType : char {
148     /// LT_NONE - do not change layout of basic blocks
149     LT_NONE = 0, /// no reordering
150     /// LT_REVERSE - reverse the order of basic blocks, meant for testing
151     /// purposes. The first basic block is left intact and the rest are
152     /// put in the reverse order.
153     LT_REVERSE,
154     /// LT_OPTIMIZE - optimize layout of basic blocks based on profile.
155     LT_OPTIMIZE,
156     /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis'
157     /// paper (PLDI '90) about block reordering, trying to minimize branch
158     /// mispredictions.
159     LT_OPTIMIZE_BRANCH,
160     /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04)
161     /// that suggests putting frequently executed chains first in the layout.
162     LT_OPTIMIZE_CACHE,
163     // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation.
164     LT_OPTIMIZE_CACHE_PLUS,
165     /// Block reordering guided by the extended TSP metric.
166     LT_OPTIMIZE_EXT_TSP,
167     /// Create clusters and use random order for them.
168     LT_OPTIMIZE_SHUFFLE,
169   };
170 
171 private:
172   /// Run the specified layout algorithm on the given function. Returns `true`
173   /// if the order of blocks was changed.
174   bool modifyFunctionLayout(BinaryFunction &Function, LayoutType Type,
175                             bool MinBranchClusters) const;
176 
177 public:
ReorderBasicBlocks(const cl::opt<bool> & PrintPass)178   explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass)
179       : BinaryFunctionPass(PrintPass) {}
180 
181   bool shouldOptimize(const BinaryFunction &BF) const override;
182 
getName()183   const char *getName() const override { return "reorder-blocks"; }
184   bool shouldPrint(const BinaryFunction &BF) const override;
185   Error runOnFunctions(BinaryContext &BC) override;
186 };
187 
188 /// Sync local branches with CFG.
189 class FixupBranches : public BinaryFunctionPass {
190 public:
FixupBranches(const cl::opt<bool> & PrintPass)191   explicit FixupBranches(const cl::opt<bool> &PrintPass)
192       : BinaryFunctionPass(PrintPass) {}
193 
getName()194   const char *getName() const override { return "fix-branches"; }
195   Error runOnFunctions(BinaryContext &BC) override;
196 };
197 
198 /// Fix the CFI state and exception handling information after all other
199 /// passes have completed.
200 class FinalizeFunctions : public BinaryFunctionPass {
201 public:
FinalizeFunctions(const cl::opt<bool> & PrintPass)202   explicit FinalizeFunctions(const cl::opt<bool> &PrintPass)
203       : BinaryFunctionPass(PrintPass) {}
204 
getName()205   const char *getName() const override { return "finalize-functions"; }
206   Error runOnFunctions(BinaryContext &BC) override;
207 };
208 
209 /// Perform any necessary adjustments for functions that do not fit into their
210 /// original space in non-relocation mode.
211 class CheckLargeFunctions : public BinaryFunctionPass {
212 public:
CheckLargeFunctions(const cl::opt<bool> & PrintPass)213   explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass)
214       : BinaryFunctionPass(PrintPass) {}
215 
getName()216   const char *getName() const override { return "check-large-functions"; }
217 
218   Error runOnFunctions(BinaryContext &BC) override;
219 
220   bool shouldOptimize(const BinaryFunction &BF) const override;
221 };
222 
223 /// Convert and remove all BOLT-related annotations before LLVM code emission.
224 class LowerAnnotations : public BinaryFunctionPass {
225 public:
LowerAnnotations(const cl::opt<bool> & PrintPass)226   explicit LowerAnnotations(const cl::opt<bool> &PrintPass)
227       : BinaryFunctionPass(PrintPass) {}
228 
getName()229   const char *getName() const override { return "lower-annotations"; }
230   Error runOnFunctions(BinaryContext &BC) override;
231 };
232 
233 /// Clean the state of the MC representation before sending it to emission
234 class CleanMCState : public BinaryFunctionPass {
235 public:
CleanMCState(const cl::opt<bool> & PrintPass)236   explicit CleanMCState(const cl::opt<bool> &PrintPass)
237       : BinaryFunctionPass(PrintPass) {}
238 
getName()239   const char *getName() const override { return "clean-mc-state"; }
240   Error runOnFunctions(BinaryContext &BC) override;
241 };
242 
243 /// An optimization to simplify conditional tail calls by removing
244 /// unnecessary branches.
245 ///
246 /// This optimization considers both of the following cases:
247 ///
248 /// foo: ...
249 ///      jcc L1   original
250 ///      ...
251 /// L1:  jmp bar  # TAILJMP
252 ///
253 /// ->
254 ///
255 /// foo: ...
256 ///      jcc bar  iff jcc L1 is expected
257 ///      ...
258 ///
259 /// L1 is unreachable
260 ///
261 /// OR
262 ///
263 /// foo: ...
264 ///      jcc  L2
265 /// L1:  jmp  dest  # TAILJMP
266 /// L2:  ...
267 ///
268 /// ->
269 ///
270 /// foo: jncc dest  # TAILJMP
271 /// L2:  ...
272 ///
273 /// L1 is unreachable
274 ///
275 /// For this particular case, the first basic block ends with
276 /// a conditional branch and has two successors, one fall-through
277 /// and one for when the condition is true.
278 /// The target of the conditional is a basic block with a single
279 /// unconditional branch (i.e. tail call) to another function.
280 /// We don't care about the contents of the fall-through block.
281 /// We assume that the target of the conditional branch is the
282 /// first successor.
283 class SimplifyConditionalTailCalls : public BinaryFunctionPass {
284   uint64_t NumCandidateTailCalls{0};
285   uint64_t NumTailCallsPatched{0};
286   uint64_t CTCExecCount{0};
287   uint64_t CTCTakenCount{0};
288   uint64_t NumOrigForwardBranches{0};
289   uint64_t NumOrigBackwardBranches{0};
290   uint64_t NumDoubleJumps{0};
291   uint64_t DeletedBlocks{0};
292   uint64_t DeletedBytes{0};
293   std::unordered_set<const BinaryFunction *> Modified;
294   std::set<const BinaryBasicBlock *> BeenOptimized;
295 
296   bool shouldRewriteBranch(const BinaryBasicBlock *PredBB,
297                            const MCInst &CondBranch, const BinaryBasicBlock *BB,
298                            const bool DirectionFlag);
299 
300   uint64_t fixTailCalls(BinaryFunction &BF);
301 
302 public:
SimplifyConditionalTailCalls(const cl::opt<bool> & PrintPass)303   explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass)
304       : BinaryFunctionPass(PrintPass) {}
305 
getName()306   const char *getName() const override {
307     return "simplify-conditional-tail-calls";
308   }
shouldPrint(const BinaryFunction & BF)309   bool shouldPrint(const BinaryFunction &BF) const override {
310     return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
311   }
312   Error runOnFunctions(BinaryContext &BC) override;
313 };
314 
315 /// Convert instructions to the form with the minimum operand width.
316 class ShortenInstructions : public BinaryFunctionPass {
317   uint64_t shortenInstructions(BinaryFunction &Function);
318 
319 public:
ShortenInstructions(const cl::opt<bool> & PrintPass)320   explicit ShortenInstructions(const cl::opt<bool> &PrintPass)
321       : BinaryFunctionPass(PrintPass) {}
322 
getName()323   const char *getName() const override { return "shorten-instructions"; }
324 
325   Error runOnFunctions(BinaryContext &BC) override;
326 };
327 
328 /// Perform simple peephole optimizations.
329 class Peepholes : public BinaryFunctionPass {
330 public:
331   enum PeepholeOpts : char {
332     PEEP_NONE = 0x0,
333     PEEP_DOUBLE_JUMPS = 0x2,
334     PEEP_TAILCALL_TRAPS = 0x4,
335     PEEP_USELESS_BRANCHES = 0x8,
336     PEEP_ALL = 0xf
337   };
338 
339 private:
340   uint64_t NumDoubleJumps{0};
341   uint64_t TailCallTraps{0};
342   uint64_t NumUselessCondBranches{0};
343 
344   /// Add trap instructions immediately after indirect tail calls to prevent
345   /// the processor from decoding instructions immediate following the
346   /// tailcall.
347   void addTailcallTraps(BinaryFunction &Function);
348 
349   /// Remove useless duplicate successors.  When the conditional
350   /// successor is the same as the unconditional successor, we can
351   /// remove the conditional successor and branch instruction.
352   void removeUselessCondBranches(BinaryFunction &Function);
353 
354 public:
Peepholes(const cl::opt<bool> & PrintPass)355   explicit Peepholes(const cl::opt<bool> &PrintPass)
356       : BinaryFunctionPass(PrintPass) {}
357 
getName()358   const char *getName() const override { return "peepholes"; }
359   Error runOnFunctions(BinaryContext &BC) override;
360 };
361 
362 /// An optimization to simplify loads from read-only sections.The pass converts
363 /// load instructions with statically computed target address such as:
364 ///
365 ///      mov 0x12f(%rip), %eax
366 ///
367 /// to their counterparts that use immediate operands instead of memory loads:
368 ///
369 ///     mov $0x4007dc, %eax
370 ///
371 /// when the target address points somewhere inside a read-only section.
372 ///
373 class SimplifyRODataLoads : public BinaryFunctionPass {
374   uint64_t NumLoadsSimplified{0};
375   uint64_t NumDynamicLoadsSimplified{0};
376   uint64_t NumLoadsFound{0};
377   uint64_t NumDynamicLoadsFound{0};
378   std::unordered_set<const BinaryFunction *> Modified;
379 
380   bool simplifyRODataLoads(BinaryFunction &BF);
381 
382 public:
SimplifyRODataLoads(const cl::opt<bool> & PrintPass)383   explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass)
384       : BinaryFunctionPass(PrintPass) {}
385 
getName()386   const char *getName() const override { return "simplify-read-only-loads"; }
shouldPrint(const BinaryFunction & BF)387   bool shouldPrint(const BinaryFunction &BF) const override {
388     return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
389   }
390   Error runOnFunctions(BinaryContext &BC) override;
391 };
392 
393 /// Assign output sections to all functions.
394 class AssignSections : public BinaryFunctionPass {
395 public:
AssignSections()396   explicit AssignSections() : BinaryFunctionPass(false) {}
397 
getName()398   const char *getName() const override { return "assign-sections"; }
399   Error runOnFunctions(BinaryContext &BC) override;
400 };
401 
402 /// Compute and report to the user the imbalance in flow equations for all
403 /// CFGs, so we can detect bad quality profile. Prints average and standard
404 /// deviation of the absolute differences of outgoing flow minus incoming flow
405 /// for blocks of interest (excluding prologues, epilogues, and BB frequency
406 /// lower than 100).
407 class PrintProfileStats : public BinaryFunctionPass {
408 public:
PrintProfileStats(const cl::opt<bool> & PrintPass)409   explicit PrintProfileStats(const cl::opt<bool> &PrintPass)
410       : BinaryFunctionPass(PrintPass) {}
411 
getName()412   const char *getName() const override { return "profile-stats"; }
shouldPrint(const BinaryFunction &)413   bool shouldPrint(const BinaryFunction &) const override { return false; }
414   Error runOnFunctions(BinaryContext &BC) override;
415 };
416 
417 /// Prints a list of the top 100 functions sorted by a set of
418 /// dyno stats categories.
419 class PrintProgramStats : public BinaryFunctionPass {
420   BoltAddressTranslation *BAT = nullptr;
421 
422 public:
423   explicit PrintProgramStats(BoltAddressTranslation *BAT = nullptr)
BinaryFunctionPass(false)424       : BinaryFunctionPass(false), BAT(BAT) {}
425 
getName()426   const char *getName() const override { return "print-stats"; }
shouldPrint(const BinaryFunction &)427   bool shouldPrint(const BinaryFunction &) const override { return false; }
428   Error runOnFunctions(BinaryContext &BC) override;
429 };
430 
431 /// Pass for lowering any instructions that we have raised and that have
432 /// to be lowered.
433 class InstructionLowering : public BinaryFunctionPass {
434 public:
InstructionLowering(const cl::opt<bool> & PrintPass)435   explicit InstructionLowering(const cl::opt<bool> &PrintPass)
436       : BinaryFunctionPass(PrintPass) {}
437 
getName()438   const char *getName() const override { return "inst-lowering"; }
439 
440   Error runOnFunctions(BinaryContext &BC) override;
441 };
442 
443 /// Pass for stripping 'repz' from 'repz retq' sequence of instructions.
444 class StripRepRet : public BinaryFunctionPass {
445 public:
StripRepRet(const cl::opt<bool> & PrintPass)446   explicit StripRepRet(const cl::opt<bool> &PrintPass)
447       : BinaryFunctionPass(PrintPass) {}
448 
getName()449   const char *getName() const override { return "strip-rep-ret"; }
450 
451   Error runOnFunctions(BinaryContext &BC) override;
452 };
453 
454 /// Pass for inlining calls to memcpy using 'rep movsb' on X86.
455 class InlineMemcpy : public BinaryFunctionPass {
456 public:
InlineMemcpy(const cl::opt<bool> & PrintPass)457   explicit InlineMemcpy(const cl::opt<bool> &PrintPass)
458       : BinaryFunctionPass(PrintPass) {}
459 
getName()460   const char *getName() const override { return "inline-memcpy"; }
461 
462   Error runOnFunctions(BinaryContext &BC) override;
463 };
464 
465 /// Pass for specializing memcpy for a size of 1 byte.
466 class SpecializeMemcpy1 : public BinaryFunctionPass {
467 private:
468   std::vector<std::string> Spec;
469 
470   /// Return indices of the call sites to optimize. Count starts at 1.
471   /// Returns an empty set for all call sites in the function.
472   std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const;
473 
474 public:
SpecializeMemcpy1(const cl::opt<bool> & PrintPass,cl::list<std::string> & Spec)475   explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass,
476                              cl::list<std::string> &Spec)
477       : BinaryFunctionPass(PrintPass), Spec(Spec) {}
478 
479   bool shouldOptimize(const BinaryFunction &BF) const override;
480 
getName()481   const char *getName() const override { return "specialize-memcpy"; }
482 
483   Error runOnFunctions(BinaryContext &BC) override;
484 };
485 
486 /// Pass to remove nops in code
487 class RemoveNops : public BinaryFunctionPass {
488   void runOnFunction(BinaryFunction &Function);
489 
490 public:
RemoveNops(const cl::opt<bool> & PrintPass)491   explicit RemoveNops(const cl::opt<bool> &PrintPass)
492       : BinaryFunctionPass(PrintPass) {}
493 
getName()494   const char *getName() const override { return "remove-nops"; }
495 
496   /// Pass entry point
497   Error runOnFunctions(BinaryContext &BC) override;
498 };
499 
500 enum FrameOptimizationType : char {
501   FOP_NONE, /// Don't perform FOP.
502   FOP_HOT,  /// Perform FOP on hot functions.
503   FOP_ALL   /// Perform FOP on all functions.
504 };
505 
506 } // namespace bolt
507 } // namespace llvm
508 
509 #endif
510