1 //===- bolt/Passes/IndirectCallPromotion.h ----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The indirect call promotion (ICP) optimization pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef BOLT_PASSES_INDIRECT_CALL_PROMOTION_H 14 #define BOLT_PASSES_INDIRECT_CALL_PROMOTION_H 15 16 #include "bolt/Passes/BinaryPasses.h" 17 18 namespace llvm { 19 namespace bolt { 20 21 /// Optimize indirect calls. 22 /// The indirect call promotion pass visits each indirect call and 23 /// examines a branch profile for each. If the most frequent targets 24 /// from that callsite exceed the specified threshold (default 90%), 25 /// the call is promoted. Otherwise, it is ignored. By default, 26 /// only one target is considered at each callsite. 27 /// 28 /// When an candidate callsite is processed, we modify the callsite 29 /// to test for the most common call targets before calling through 30 /// the original generic call mechanism. 31 /// 32 /// The CFG and layout are modified by ICP. 33 /// 34 /// A few new command line options have been added: 35 /// -indirect-call-promotion=[none,call,jump-tables,all] 36 /// -indirect-call-promotion-threshold=<percentage> 37 /// -indirect-call-promotion-mispredict-threshold=<percentage> 38 /// -indirect-call-promotion-topn=<int> 39 /// 40 /// The threshold is the minimum frequency of a call target needed 41 /// before ICP is triggered. 42 /// 43 /// The mispredict threshold is used to disable the optimization at 44 /// any callsite where the branch predictor does a good enough job 45 /// that ICP wouldn't help regardless of the frequency of the most 46 /// common target. 47 /// 48 /// The topn option controls the number of targets to consider for 49 /// each callsite, e.g. ICP is triggered if topn=2 and the total 50 /// frequency of the top two call targets exceeds the threshold. 51 /// 52 /// The minimize code size option controls whether or not the hot 53 /// calls are to registers (callq %r10) or to function addresses 54 /// (callq $foo). 55 /// 56 /// Example of ICP: 57 /// 58 /// C++ code: 59 /// 60 /// int B_count = 0; 61 /// int C_count = 0; 62 /// 63 /// struct A { virtual void foo() = 0; } 64 /// struct B : public A { virtual void foo() { ++B_count; }; }; 65 /// struct C : public A { virtual void foo() { ++C_count; }; }; 66 /// 67 /// A* a = ... 68 /// a->foo(); 69 /// ... 70 /// 71 /// original assembly: 72 /// 73 /// B0: 49 8b 07 mov (%r15),%rax 74 /// 4c 89 ff mov %r15,%rdi 75 /// ff 10 callq *(%rax) 76 /// 41 83 e6 01 and $0x1,%r14d 77 /// 4d 89 e6 mov %r12,%r14 78 /// 4c 0f 44 f5 cmove %rbp,%r14 79 /// 4c 89 f7 mov %r14,%rdi 80 /// ... 81 /// 82 /// after ICP: 83 /// 84 /// B0: 49 8b 07 mov (%r15),%rax 85 /// 4c 89 ff mov %r15,%rdi 86 /// 48 81 38 e0 0b 40 00 cmpq $B::foo,(%rax) 87 /// 75 29 jne B3 88 /// B1: e8 45 03 00 00 callq $B::foo 89 /// B2: 41 83 e6 01 and $0x1,%r14d 90 /// 4d 89 e6 mov %r12,%r14 91 /// 4c 0f 44 f5 cmove %rbp,%r14 92 /// 4c 89 f7 mov %r14,%rdi 93 /// ... 94 /// 95 /// B3: ff 10 callq *(%rax) 96 /// eb d6 jmp B2 97 /// 98 class IndirectCallPromotion : public BinaryFunctionPass { 99 using BasicBlocksVector = std::vector<std::unique_ptr<BinaryBasicBlock>>; 100 using MethodInfoType = std::pair<std::vector<std::pair<MCSymbol *, uint64_t>>, 101 std::vector<MCInst *>>; 102 using JumpTableInfoType = std::vector<std::pair<uint64_t, uint64_t>>; 103 using SymTargetsType = std::vector<std::pair<MCSymbol *, uint64_t>>; 104 struct Location { 105 MCSymbol *Sym{nullptr}; 106 uint64_t Addr{0}; isValidLocation107 bool isValid() const { return Sym || Addr != 0; } LocationLocation108 Location() {} LocationLocation109 explicit Location(MCSymbol *Sym) : Sym(Sym) {} LocationLocation110 explicit Location(uint64_t Addr) : Addr(Addr) {} 111 }; 112 113 struct Callsite { 114 Location From; 115 Location To; 116 uint64_t Mispreds{0}; 117 uint64_t Branches{0}; 118 // Indices in the jmp table (jt only) 119 std::vector<uint64_t> JTIndices; isValidCallsite120 bool isValid() const { return From.isValid() && To.isValid(); } 121 Callsite(BinaryFunction &BF, const IndirectCallProfile &ICP); CallsiteCallsite122 Callsite(const Location &From, const Location &To, uint64_t Mispreds, 123 uint64_t Branches, uint64_t JTIndex) 124 : From(From), To(To), Mispreds(Mispreds), Branches(Branches), 125 JTIndices(1, JTIndex) {} 126 }; 127 128 std::unordered_set<const BinaryFunction *> Modified; 129 // Total number of calls from all callsites. 130 uint64_t TotalCalls{0}; 131 132 // Total number of indirect calls from all callsites. 133 // (a fraction of TotalCalls) 134 uint64_t TotalIndirectCalls{0}; 135 136 // Total number of jmp table calls from all callsites. 137 // (a fraction of TotalCalls) 138 uint64_t TotalIndirectJmps{0}; 139 140 // Total number of callsites that use indirect calls. 141 // (the total number of callsites is not recorded) 142 uint64_t TotalIndirectCallsites{0}; 143 144 // Total number of callsites that are jump tables. 145 uint64_t TotalJumpTableCallsites{0}; 146 147 // Total number of indirect callsites that are optimized by ICP. 148 // (a fraction of TotalIndirectCallsites) 149 uint64_t TotalOptimizedIndirectCallsites{0}; 150 151 // Total number of method callsites that can have loads eliminated. 152 mutable uint64_t TotalMethodLoadEliminationCandidates{0}; 153 154 // Total number of method callsites that had loads eliminated. 155 uint64_t TotalMethodLoadsEliminated{0}; 156 157 // Total number of jump table callsites that are optimized by ICP. 158 uint64_t TotalOptimizedJumpTableCallsites{0}; 159 160 // Total number of indirect calls that are optimized by ICP. 161 // (a fraction of TotalCalls) 162 uint64_t TotalNumFrequentCalls{0}; 163 164 // Total number of jump table calls that are optimized by ICP. 165 // (a fraction of TotalCalls) 166 uint64_t TotalNumFrequentJmps{0}; 167 168 // Total number of jump table sites that can use hot indices. 169 mutable uint64_t TotalIndexBasedCandidates{0}; 170 171 // Total number of jump table sites that use hot indices. 172 uint64_t TotalIndexBasedJumps{0}; 173 174 void printDecision(llvm::raw_ostream &OS, 175 std::vector<IndirectCallPromotion::Callsite> &Targets, 176 unsigned N) const; 177 178 std::vector<Callsite> getCallTargets(BinaryBasicBlock &BB, 179 const MCInst &Inst) const; 180 181 size_t canPromoteCallsite(const BinaryBasicBlock &BB, const MCInst &Inst, 182 const std::vector<Callsite> &Targets, 183 uint64_t NumCalls); 184 185 void printCallsiteInfo(const BinaryBasicBlock &BB, const MCInst &Inst, 186 const std::vector<Callsite> &Targets, const size_t N, 187 uint64_t NumCalls) const; 188 189 JumpTableInfoType maybeGetHotJumpTableTargets(BinaryBasicBlock &BB, 190 MCInst &Inst, 191 MCInst *&TargetFetchInst, 192 const JumpTable *JT) const; 193 194 SymTargetsType findCallTargetSymbols(std::vector<Callsite> &Targets, 195 size_t &N, BinaryBasicBlock &BB, 196 MCInst &Inst, 197 MCInst *&TargetFetchInst) const; 198 199 MethodInfoType maybeGetVtableSyms(BinaryBasicBlock &BB, MCInst &Inst, 200 const SymTargetsType &SymTargets) const; 201 202 std::vector<std::unique_ptr<BinaryBasicBlock>> 203 rewriteCall(BinaryBasicBlock &IndCallBlock, const MCInst &CallInst, 204 MCPlusBuilder::BlocksVectorTy &&ICPcode, 205 const std::vector<MCInst *> &MethodFetchInsns) const; 206 207 BinaryBasicBlock *fixCFG(BinaryBasicBlock &IndCallBlock, 208 const bool IsTailCall, const bool IsJumpTable, 209 BasicBlocksVector &&NewBBs, 210 const std::vector<Callsite> &Targets) const; 211 212 public: IndirectCallPromotion(const cl::opt<bool> & PrintPass)213 explicit IndirectCallPromotion(const cl::opt<bool> &PrintPass) 214 : BinaryFunctionPass(PrintPass) {} 215 getName()216 const char *getName() const override { return "indirect-call-promotion"; } shouldPrint(const BinaryFunction & BF)217 bool shouldPrint(const BinaryFunction &BF) const override { 218 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; 219 } shouldOptimize(const BinaryFunction & BF)220 bool shouldOptimize(const BinaryFunction &BF) const override { 221 return BF.isSimple() && !BF.isIgnored() && BF.hasProfile() && 222 !BF.hasUnknownControlFlow(); 223 } 224 Error runOnFunctions(BinaryContext &BC) override; 225 }; 226 227 } // namespace bolt 228 } // namespace llvm 229 230 #endif 231