xref: /llvm-project/bolt/include/bolt/Passes/IndirectCallPromotion.h (revision d98e3d43e7943f0440013c9f491f323bcc864aa3)
1 //===- bolt/Passes/IndirectCallPromotion.h ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The indirect call promotion (ICP) optimization pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef BOLT_PASSES_INDIRECT_CALL_PROMOTION_H
14 #define BOLT_PASSES_INDIRECT_CALL_PROMOTION_H
15 
16 #include "bolt/Passes/BinaryPasses.h"
17 
18 namespace llvm {
19 namespace bolt {
20 
21 /// Optimize indirect calls.
22 /// The indirect call promotion pass visits each indirect call and
23 /// examines a branch profile for each. If the most frequent targets
24 /// from that callsite exceed the specified threshold (default 90%),
25 /// the call is promoted. Otherwise, it is ignored. By default,
26 /// only one target is considered at each callsite.
27 ///
28 /// When an candidate callsite is processed, we modify the callsite
29 /// to test for the most common call targets before calling through
30 /// the original generic call mechanism.
31 ///
32 /// The CFG and layout are modified by ICP.
33 ///
34 /// A few new command line options have been added:
35 ///   -indirect-call-promotion=[none,call,jump-tables,all]
36 ///   -indirect-call-promotion-threshold=<percentage>
37 ///   -indirect-call-promotion-mispredict-threshold=<percentage>
38 ///   -indirect-call-promotion-topn=<int>
39 ///
40 /// The threshold is the minimum frequency of a call target needed
41 /// before ICP is triggered.
42 ///
43 /// The mispredict threshold is used to disable the optimization at
44 /// any callsite where the branch predictor does a good enough job
45 /// that ICP wouldn't help regardless of the frequency of the most
46 /// common target.
47 ///
48 /// The topn option controls the number of targets to consider for
49 /// each callsite, e.g. ICP is triggered if topn=2 and the total
50 /// frequency of the top two call targets exceeds the threshold.
51 ///
52 /// The minimize code size option controls whether or not the hot
53 /// calls are to registers (callq %r10) or to function addresses
54 /// (callq $foo).
55 ///
56 /// Example of ICP:
57 ///
58 /// C++ code:
59 ///
60 ///   int B_count = 0;
61 ///   int C_count = 0;
62 ///
63 ///   struct A { virtual void foo() = 0; }
64 ///   struct B : public A { virtual void foo() { ++B_count; }; };
65 ///   struct C : public A { virtual void foo() { ++C_count; }; };
66 ///
67 ///   A* a = ...
68 ///   a->foo();
69 ///   ...
70 ///
71 /// original assembly:
72 ///
73 ///   B0: 49 8b 07             mov    (%r15),%rax
74 ///       4c 89 ff             mov    %r15,%rdi
75 ///       ff 10                callq  *(%rax)
76 ///       41 83 e6 01          and    $0x1,%r14d
77 ///       4d 89 e6             mov    %r12,%r14
78 ///       4c 0f 44 f5          cmove  %rbp,%r14
79 ///       4c 89 f7             mov    %r14,%rdi
80 ///       ...
81 ///
82 /// after ICP:
83 ///
84 ///   B0: 49 8b 07             mov    (%r15),%rax
85 ///       4c 89 ff             mov    %r15,%rdi
86 ///       48 81 38 e0 0b 40 00 cmpq   $B::foo,(%rax)
87 ///       75 29                jne    B3
88 ///   B1: e8 45 03 00 00       callq  $B::foo
89 ///   B2: 41 83 e6 01          and    $0x1,%r14d
90 ///       4d 89 e6             mov    %r12,%r14
91 ///       4c 0f 44 f5          cmove  %rbp,%r14
92 ///       4c 89 f7             mov    %r14,%rdi
93 ///       ...
94 ///
95 ///   B3: ff 10                callq  *(%rax)
96 ///       eb d6                jmp    B2
97 ///
98 class IndirectCallPromotion : public BinaryFunctionPass {
99   using BasicBlocksVector = std::vector<std::unique_ptr<BinaryBasicBlock>>;
100   using MethodInfoType = std::pair<std::vector<std::pair<MCSymbol *, uint64_t>>,
101                                    std::vector<MCInst *>>;
102   using JumpTableInfoType = std::vector<std::pair<uint64_t, uint64_t>>;
103   using SymTargetsType = std::vector<std::pair<MCSymbol *, uint64_t>>;
104   struct Location {
105     MCSymbol *Sym{nullptr};
106     uint64_t Addr{0};
isValidLocation107     bool isValid() const { return Sym || Addr != 0; }
LocationLocation108     Location() {}
LocationLocation109     explicit Location(MCSymbol *Sym) : Sym(Sym) {}
LocationLocation110     explicit Location(uint64_t Addr) : Addr(Addr) {}
111   };
112 
113   struct Callsite {
114     Location From;
115     Location To;
116     uint64_t Mispreds{0};
117     uint64_t Branches{0};
118     // Indices in the jmp table (jt only)
119     std::vector<uint64_t> JTIndices;
isValidCallsite120     bool isValid() const { return From.isValid() && To.isValid(); }
121     Callsite(BinaryFunction &BF, const IndirectCallProfile &ICP);
CallsiteCallsite122     Callsite(const Location &From, const Location &To, uint64_t Mispreds,
123              uint64_t Branches, uint64_t JTIndex)
124         : From(From), To(To), Mispreds(Mispreds), Branches(Branches),
125           JTIndices(1, JTIndex) {}
126   };
127 
128   std::unordered_set<const BinaryFunction *> Modified;
129   // Total number of calls from all callsites.
130   uint64_t TotalCalls{0};
131 
132   // Total number of indirect calls from all callsites.
133   // (a fraction of TotalCalls)
134   uint64_t TotalIndirectCalls{0};
135 
136   // Total number of jmp table calls from all callsites.
137   // (a fraction of TotalCalls)
138   uint64_t TotalIndirectJmps{0};
139 
140   // Total number of callsites that use indirect calls.
141   // (the total number of callsites is not recorded)
142   uint64_t TotalIndirectCallsites{0};
143 
144   // Total number of callsites that are jump tables.
145   uint64_t TotalJumpTableCallsites{0};
146 
147   // Total number of indirect callsites that are optimized by ICP.
148   // (a fraction of TotalIndirectCallsites)
149   uint64_t TotalOptimizedIndirectCallsites{0};
150 
151   // Total number of method callsites that can have loads eliminated.
152   mutable uint64_t TotalMethodLoadEliminationCandidates{0};
153 
154   // Total number of method callsites that had loads eliminated.
155   uint64_t TotalMethodLoadsEliminated{0};
156 
157   // Total number of jump table callsites that are optimized by ICP.
158   uint64_t TotalOptimizedJumpTableCallsites{0};
159 
160   // Total number of indirect calls that are optimized by ICP.
161   // (a fraction of TotalCalls)
162   uint64_t TotalNumFrequentCalls{0};
163 
164   // Total number of jump table calls that are optimized by ICP.
165   // (a fraction of TotalCalls)
166   uint64_t TotalNumFrequentJmps{0};
167 
168   // Total number of jump table sites that can use hot indices.
169   mutable uint64_t TotalIndexBasedCandidates{0};
170 
171   // Total number of jump table sites that use hot indices.
172   uint64_t TotalIndexBasedJumps{0};
173 
174   void printDecision(llvm::raw_ostream &OS,
175                      std::vector<IndirectCallPromotion::Callsite> &Targets,
176                      unsigned N) const;
177 
178   std::vector<Callsite> getCallTargets(BinaryBasicBlock &BB,
179                                        const MCInst &Inst) const;
180 
181   size_t canPromoteCallsite(const BinaryBasicBlock &BB, const MCInst &Inst,
182                             const std::vector<Callsite> &Targets,
183                             uint64_t NumCalls);
184 
185   void printCallsiteInfo(const BinaryBasicBlock &BB, const MCInst &Inst,
186                          const std::vector<Callsite> &Targets, const size_t N,
187                          uint64_t NumCalls) const;
188 
189   JumpTableInfoType maybeGetHotJumpTableTargets(BinaryBasicBlock &BB,
190                                                 MCInst &Inst,
191                                                 MCInst *&TargetFetchInst,
192                                                 const JumpTable *JT) const;
193 
194   SymTargetsType findCallTargetSymbols(std::vector<Callsite> &Targets,
195                                        size_t &N, BinaryBasicBlock &BB,
196                                        MCInst &Inst,
197                                        MCInst *&TargetFetchInst) const;
198 
199   MethodInfoType maybeGetVtableSyms(BinaryBasicBlock &BB, MCInst &Inst,
200                                     const SymTargetsType &SymTargets) const;
201 
202   std::vector<std::unique_ptr<BinaryBasicBlock>>
203   rewriteCall(BinaryBasicBlock &IndCallBlock, const MCInst &CallInst,
204               MCPlusBuilder::BlocksVectorTy &&ICPcode,
205               const std::vector<MCInst *> &MethodFetchInsns) const;
206 
207   BinaryBasicBlock *fixCFG(BinaryBasicBlock &IndCallBlock,
208                            const bool IsTailCall, const bool IsJumpTable,
209                            BasicBlocksVector &&NewBBs,
210                            const std::vector<Callsite> &Targets) const;
211 
212 public:
IndirectCallPromotion(const cl::opt<bool> & PrintPass)213   explicit IndirectCallPromotion(const cl::opt<bool> &PrintPass)
214       : BinaryFunctionPass(PrintPass) {}
215 
getName()216   const char *getName() const override { return "indirect-call-promotion"; }
shouldPrint(const BinaryFunction & BF)217   bool shouldPrint(const BinaryFunction &BF) const override {
218     return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
219   }
shouldOptimize(const BinaryFunction & BF)220   bool shouldOptimize(const BinaryFunction &BF) const override {
221     return BF.isSimple() && !BF.isIgnored() && BF.hasProfile() &&
222            !BF.hasUnknownControlFlow();
223   }
224   Error runOnFunctions(BinaryContext &BC) override;
225 };
226 
227 } // namespace bolt
228 } // namespace llvm
229 
230 #endif
231