xref: /llvm-project/bolt/include/bolt/Passes/TailDuplication.h (revision a5f3d1a803020167bd9d494a8a3921e7dcc1550a)
1 //===- bolt/Passes/TailDuplication.h ----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass founds cases when BBs have layout:
10 // #BB0:
11 // <body>
12 // jmp #BB2
13 // ....
14 // #BB1
15 // <body>
16 // #BB2:
17 // <body>
18 //
19 // And duplicates #BB2 and puts it after #BB0:
20 // #BB0:
21 // <body>
22 // #BB2:
23 // <body>
24 // ....
25 // #BB1
26 // <body>
27 // #BB2:
28 // <body>
29 //
30 // The advantage is getting rid of an unconditional branch and hopefully to
31 // improve i-cache performance by reducing fragmentation The disadvantage is
32 // that if there is too much code duplication, we may end up evicting hot cache
33 // lines and causing the opposite effect, hurting i-cache performance This needs
34 // to be well balanced to achieve the optimal effect
35 //
36 //===----------------------------------------------------------------------===//
37 
38 #ifndef BOLT_PASSES_TAILDUPLICATION_H
39 #define BOLT_PASSES_TAILDUPLICATION_H
40 
41 #include "bolt/Passes/BinaryPasses.h"
42 
43 namespace llvm {
44 namespace bolt {
45 
46 /// Pass for duplicating blocks that would require a jump.
47 class TailDuplication : public BinaryFunctionPass {
48   /// Record how many possible tail duplications there can be.
49   uint64_t ModifiedFunctions = 0;
50 
51   /// The number of duplicated basic blocks.
52   uint64_t DuplicatedBlockCount = 0;
53 
54   /// The size (in bytes) of duplicated basic blocks.
55   uint64_t DuplicatedByteCount = 0;
56 
57   /// Record how many times these duplications would get used.
58   uint64_t DuplicationsDynamicCount = 0;
59 
60   /// Record the execution count of all blocks.
61   uint64_t AllDynamicCount = 0;
62 
63   /// Record the number of instructions deleted because of propagation
64   uint64_t StaticInstructionDeletionCount = 0;
65 
66   /// Record the number of instructions deleted because of propagation
67   uint64_t DynamicInstructionDeletionCount = 0;
68 
69   /// Sets Regs with the caller saved registers
70   void getCallerSavedRegs(const MCInst &Inst, BitVector &Regs,
71                           BinaryContext &BC) const;
72 
73   /// Returns true if Reg is possibly overwritten by Inst
74   bool regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg,
75                                 BinaryContext &BC) const;
76 
77   /// Returns true if Reg is definitely overwritten by Inst
78   bool regIsDefinitelyOverwritten(const MCInst &Inst, unsigned Reg,
79                                   BinaryContext &BC) const;
80 
81   /// Returns true if Reg is used by Inst
82   bool regIsUsed(const MCInst &Inst, unsigned Reg, BinaryContext &BC) const;
83 
84   /// Returns true if Reg is overwritten before its used by StartBB's successors
85   bool isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB, unsigned Reg) const;
86 
87   /// Constant and Copy Propagate for the block formed by OriginalBB and
88   /// BlocksToPropagate
89   void
90   constantAndCopyPropagate(BinaryBasicBlock &OriginalBB,
91                            std::vector<BinaryBasicBlock *> &BlocksToPropagate);
92 
93   /// True if Tail is in the same cache line as BB (approximately)
94   bool isInCacheLine(const BinaryBasicBlock &BB,
95                      const BinaryBasicBlock &Tail) const;
96 
97   /// Duplicates BlocksToDuplicate and places them after BB.
98   std::vector<BinaryBasicBlock *> duplicateBlocks(
99       BinaryBasicBlock &BB,
100       const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const;
101 
102   /// Decide whether the tail basic blocks should be duplicated after BB.
103   bool shouldDuplicate(BinaryBasicBlock *BB, BinaryBasicBlock *Tail) const;
104 
105   /// Compute the cache score for a jump (Src, Dst) with frequency Count.
106   /// The value is in the range [0..1] and quantifies how "cache-friendly"
107   /// the jump is. The score is close to 1 for "short" forward jumps and
108   /// it is 0 for "long" jumps exceeding a specified threshold; between the
109   /// bounds, the value decreases linearly. For backward jumps, the value is
110   /// scaled by a specified factor.
111   double cacheScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr,
112                     uint64_t DstSize, uint64_t Count) const;
113 
114   /// Decide whether the cache score has been improved after duplication.
115   bool cacheScoreImproved(const MCCodeEmitter *Emitter, BinaryFunction &BF,
116                           BinaryBasicBlock *Pred, BinaryBasicBlock *Tail) const;
117 
118   /// A moderate strategy for tail duplication.
119   /// Returns a vector of BinaryBasicBlock to copy after BB. If it's empty,
120   /// nothing should be duplicated.
121   std::vector<BinaryBasicBlock *>
122   moderateDuplicate(BinaryBasicBlock &BB, BinaryBasicBlock &Tail) const;
123 
124   /// An aggressive strategy for tail duplication.
125   std::vector<BinaryBasicBlock *>
126   aggressiveDuplicate(BinaryBasicBlock &BB, BinaryBasicBlock &Tail) const;
127 
128   /// A cache-aware strategy for tail duplication.
129   std::vector<BinaryBasicBlock *> cacheDuplicate(const MCCodeEmitter *Emitter,
130                                                  BinaryFunction &BF,
131                                                  BinaryBasicBlock *BB,
132                                                  BinaryBasicBlock *Tail) const;
133 
134   void runOnFunction(BinaryFunction &Function);
135 
136 public:
137   enum DuplicationMode : char {
138     TD_NONE = 0,
139     TD_AGGRESSIVE,
140     TD_MODERATE,
141     TD_CACHE
142   };
143 
TailDuplication()144   explicit TailDuplication() : BinaryFunctionPass(false) {}
145 
getName()146   const char *getName() const override { return "tail duplication"; }
147 
148   Error runOnFunctions(BinaryContext &BC) override;
149 };
150 
151 } // namespace bolt
152 } // namespace llvm
153 
154 #endif
155