xref: /llvm-project/bolt/include/bolt/Passes/FrameOptimizer.h (revision 13d60ce2f262ef9055389908b63824e53b3054a1)
1 //===- bolt/Passes/FrameOptimizer.h -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef BOLT_PASSES_FRAMEOPTIMIZER_H
10 #define BOLT_PASSES_FRAMEOPTIMIZER_H
11 
12 #include "bolt/Passes/BinaryPasses.h"
13 
14 namespace llvm {
15 namespace bolt {
16 class FrameAnalysis;
17 class RegAnalysis;
18 
19 /// FrameOptimizerPass strives for removing or moving stack frame accesses to
20 /// less frequently executed basic blocks, reducing the pressure on icache
21 /// usage as well as dynamic instruction count.
22 ///
23 /// This is accomplished by analyzing both caller-saved register spills and
24 /// callee-saved register spills. This class handles the former while delegating
25 /// the latter to the class ShrinkWrapping. We discuss caller-saved register
26 /// spills optimization below.
27 ///
28 /// Caller-saved registers must be conservatively pushed to the stack because
29 /// the callee may write to these registers. If we can prove the callee will
30 /// never touch these registers, we can remove this spill.
31 ///
32 /// This optimization analyzes the call graph and first computes the set of
33 /// registers that may get overwritten when executing a function (this includes
34 /// the set of registers touched by all functions this function may call during
35 /// its execution) -- see the FrameAnalysis class for implementation details.
36 ///
37 /// The second step is to perform an analysis to disambiguate which stack
38 /// position is being accessed by each load/store instruction -- see the
39 /// FrameAnalysis class.
40 ///
41 /// The third step performs a forward dataflow analysis, using intersection as
42 /// the confluence operator, to propagate information about available
43 /// stack definitions at each point of the program. See the
44 /// StackAvailableExpressions class. This definition shows an equivalence
45 /// between the value in a stack position and the value of a register or
46 /// immediate. To have those preserved, both register and the value in the stack
47 /// position cannot be touched by another instruction.
48 /// These definitions we are tracking occur in the form:
49 ///
50 ///     stack def:  MEM[FRAME - 0x5c]  <= RAX
51 ///
52 /// Any instruction that writes to RAX will kill this definition, meaning RAX
53 /// cannot be used to recover the same value that is in FRAME - 0x5c. Any memory
54 /// write instruction to FRAME - 0x5c will also kill this definition.
55 ///
56 /// If such a definition is available at an instruction that loads from this
57 /// frame offset, we have detected a redundant load. For example, if the
58 /// previous stack definition is available at the following instruction, this
59 /// is an example of a redundant stack load:
60 ///
61 ///     stack load:  RAX  <= MEM[FRAME - 0x5c]
62 ///
63 /// The fourth step will use this info to actually modify redundant loads. In
64 /// our running example, we would change the stack load to the following reg
65 /// move:
66 ///
67 ///     RAX <= RAX  // can be deleted
68 ///
69 /// In this example, since the store source register is the same as the load
70 /// destination register, this creates a redundant MOV that can be deleted.
71 ///
72 /// Finally, another analysis propagates information about which instructions
73 /// are using (loading from) a stack position -- see StackReachingUses. If a
74 /// store sees no use of the value it is storing, it is eliminated.
75 ///
76 class FrameOptimizerPass : public BinaryFunctionPass {
77   /// Stats aggregating variables
78   uint64_t NumRedundantLoads{0};
79   uint64_t NumRedundantStores{0};
80   uint64_t FreqRedundantLoads{0};
81   uint64_t FreqRedundantStores{0};
82   uint64_t FreqLoadsChangedToReg{0};
83   uint64_t FreqLoadsChangedToImm{0};
84   uint64_t NumLoadsDeleted{0};
85   uint64_t FreqLoadsDeleted{0};
86 
87   DenseSet<const BinaryFunction *> FuncsChanged;
88 
89   std::mutex FuncsChangedMutex;
90 
91   /// Perform a dataflow analysis in \p BF to reveal unnecessary reloads from
92   /// the frame. Use the analysis to convert memory loads to register moves or
93   /// immediate loads. Delete redundant register moves.
94   void removeUnnecessaryLoads(const RegAnalysis &RA, const FrameAnalysis &FA,
95                               BinaryFunction &BF);
96 
97   /// Use information from stack frame usage to delete unused stores.
98   void removeUnusedStores(const FrameAnalysis &FA, BinaryFunction &BF);
99 
100   /// Perform shrinkwrapping step
101   Error performShrinkWrapping(const RegAnalysis &RA, const FrameAnalysis &FA,
102                               BinaryContext &BC);
103 
104 public:
FrameOptimizerPass(const cl::opt<bool> & PrintPass)105   explicit FrameOptimizerPass(const cl::opt<bool> &PrintPass)
106       : BinaryFunctionPass(PrintPass) {}
107 
getName()108   const char *getName() const override { return "frame-optimizer"; }
109 
110   /// Pass entry point
111   Error runOnFunctions(BinaryContext &BC) override;
112 
shouldPrint(const BinaryFunction & BF)113   bool shouldPrint(const BinaryFunction &BF) const override {
114     return BinaryFunctionPass::shouldPrint(BF) && FuncsChanged.count(&BF) > 0;
115   }
116 };
117 
118 } // namespace bolt
119 
120 } // namespace llvm
121 
122 #endif
123