1 //===- bolt/Passes/TailDuplication.h ----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass founds cases when BBs have layout: 10 // #BB0: 11 // <body> 12 // jmp #BB2 13 // .... 14 // #BB1 15 // <body> 16 // #BB2: 17 // <body> 18 // 19 // And duplicates #BB2 and puts it after #BB0: 20 // #BB0: 21 // <body> 22 // #BB2: 23 // <body> 24 // .... 25 // #BB1 26 // <body> 27 // #BB2: 28 // <body> 29 // 30 // The advantage is getting rid of an unconditional branch and hopefully to 31 // improve i-cache performance by reducing fragmentation The disadvantage is 32 // that if there is too much code duplication, we may end up evicting hot cache 33 // lines and causing the opposite effect, hurting i-cache performance This needs 34 // to be well balanced to achieve the optimal effect 35 // 36 //===----------------------------------------------------------------------===// 37 38 #ifndef BOLT_PASSES_TAILDUPLICATION_H 39 #define BOLT_PASSES_TAILDUPLICATION_H 40 41 #include "bolt/Passes/BinaryPasses.h" 42 43 namespace llvm { 44 namespace bolt { 45 46 /// Pass for duplicating blocks that would require a jump. 47 class TailDuplication : public BinaryFunctionPass { 48 /// Record how many possible tail duplications there can be. 49 uint64_t ModifiedFunctions = 0; 50 51 /// The number of duplicated basic blocks. 52 uint64_t DuplicatedBlockCount = 0; 53 54 /// The size (in bytes) of duplicated basic blocks. 55 uint64_t DuplicatedByteCount = 0; 56 57 /// Record how many times these duplications would get used. 58 uint64_t DuplicationsDynamicCount = 0; 59 60 /// Record the execution count of all blocks. 61 uint64_t AllDynamicCount = 0; 62 63 /// Record the number of instructions deleted because of propagation 64 uint64_t StaticInstructionDeletionCount = 0; 65 66 /// Record the number of instructions deleted because of propagation 67 uint64_t DynamicInstructionDeletionCount = 0; 68 69 /// Sets Regs with the caller saved registers 70 void getCallerSavedRegs(const MCInst &Inst, BitVector &Regs, 71 BinaryContext &BC) const; 72 73 /// Returns true if Reg is possibly overwritten by Inst 74 bool regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg, 75 BinaryContext &BC) const; 76 77 /// Returns true if Reg is definitely overwritten by Inst 78 bool regIsDefinitelyOverwritten(const MCInst &Inst, unsigned Reg, 79 BinaryContext &BC) const; 80 81 /// Returns true if Reg is used by Inst 82 bool regIsUsed(const MCInst &Inst, unsigned Reg, BinaryContext &BC) const; 83 84 /// Returns true if Reg is overwritten before its used by StartBB's successors 85 bool isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB, unsigned Reg) const; 86 87 /// Constant and Copy Propagate for the block formed by OriginalBB and 88 /// BlocksToPropagate 89 void 90 constantAndCopyPropagate(BinaryBasicBlock &OriginalBB, 91 std::vector<BinaryBasicBlock *> &BlocksToPropagate); 92 93 /// True if Tail is in the same cache line as BB (approximately) 94 bool isInCacheLine(const BinaryBasicBlock &BB, 95 const BinaryBasicBlock &Tail) const; 96 97 /// Duplicates BlocksToDuplicate and places them after BB. 98 std::vector<BinaryBasicBlock *> duplicateBlocks( 99 BinaryBasicBlock &BB, 100 const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const; 101 102 /// Decide whether the tail basic blocks should be duplicated after BB. 103 bool shouldDuplicate(BinaryBasicBlock *BB, BinaryBasicBlock *Tail) const; 104 105 /// Compute the cache score for a jump (Src, Dst) with frequency Count. 106 /// The value is in the range [0..1] and quantifies how "cache-friendly" 107 /// the jump is. The score is close to 1 for "short" forward jumps and 108 /// it is 0 for "long" jumps exceeding a specified threshold; between the 109 /// bounds, the value decreases linearly. For backward jumps, the value is 110 /// scaled by a specified factor. 111 double cacheScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr, 112 uint64_t DstSize, uint64_t Count) const; 113 114 /// Decide whether the cache score has been improved after duplication. 115 bool cacheScoreImproved(const MCCodeEmitter *Emitter, BinaryFunction &BF, 116 BinaryBasicBlock *Pred, BinaryBasicBlock *Tail) const; 117 118 /// A moderate strategy for tail duplication. 119 /// Returns a vector of BinaryBasicBlock to copy after BB. If it's empty, 120 /// nothing should be duplicated. 121 std::vector<BinaryBasicBlock *> 122 moderateDuplicate(BinaryBasicBlock &BB, BinaryBasicBlock &Tail) const; 123 124 /// An aggressive strategy for tail duplication. 125 std::vector<BinaryBasicBlock *> 126 aggressiveDuplicate(BinaryBasicBlock &BB, BinaryBasicBlock &Tail) const; 127 128 /// A cache-aware strategy for tail duplication. 129 std::vector<BinaryBasicBlock *> cacheDuplicate(const MCCodeEmitter *Emitter, 130 BinaryFunction &BF, 131 BinaryBasicBlock *BB, 132 BinaryBasicBlock *Tail) const; 133 134 void runOnFunction(BinaryFunction &Function); 135 136 public: 137 enum DuplicationMode : char { 138 TD_NONE = 0, 139 TD_AGGRESSIVE, 140 TD_MODERATE, 141 TD_CACHE 142 }; 143 TailDuplication()144 explicit TailDuplication() : BinaryFunctionPass(false) {} 145 getName()146 const char *getName() const override { return "tail duplication"; } 147 148 Error runOnFunctions(BinaryContext &BC) override; 149 }; 150 151 } // namespace bolt 152 } // namespace llvm 153 154 #endif 155