1 //===- bolt/Passes/FrameOptimizer.h -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef BOLT_PASSES_FRAMEOPTIMIZER_H 10 #define BOLT_PASSES_FRAMEOPTIMIZER_H 11 12 #include "bolt/Passes/BinaryPasses.h" 13 14 namespace llvm { 15 namespace bolt { 16 class FrameAnalysis; 17 class RegAnalysis; 18 19 /// FrameOptimizerPass strives for removing or moving stack frame accesses to 20 /// less frequently executed basic blocks, reducing the pressure on icache 21 /// usage as well as dynamic instruction count. 22 /// 23 /// This is accomplished by analyzing both caller-saved register spills and 24 /// callee-saved register spills. This class handles the former while delegating 25 /// the latter to the class ShrinkWrapping. We discuss caller-saved register 26 /// spills optimization below. 27 /// 28 /// Caller-saved registers must be conservatively pushed to the stack because 29 /// the callee may write to these registers. If we can prove the callee will 30 /// never touch these registers, we can remove this spill. 31 /// 32 /// This optimization analyzes the call graph and first computes the set of 33 /// registers that may get overwritten when executing a function (this includes 34 /// the set of registers touched by all functions this function may call during 35 /// its execution) -- see the FrameAnalysis class for implementation details. 36 /// 37 /// The second step is to perform an analysis to disambiguate which stack 38 /// position is being accessed by each load/store instruction -- see the 39 /// FrameAnalysis class. 40 /// 41 /// The third step performs a forward dataflow analysis, using intersection as 42 /// the confluence operator, to propagate information about available 43 /// stack definitions at each point of the program. See the 44 /// StackAvailableExpressions class. This definition shows an equivalence 45 /// between the value in a stack position and the value of a register or 46 /// immediate. To have those preserved, both register and the value in the stack 47 /// position cannot be touched by another instruction. 48 /// These definitions we are tracking occur in the form: 49 /// 50 /// stack def: MEM[FRAME - 0x5c] <= RAX 51 /// 52 /// Any instruction that writes to RAX will kill this definition, meaning RAX 53 /// cannot be used to recover the same value that is in FRAME - 0x5c. Any memory 54 /// write instruction to FRAME - 0x5c will also kill this definition. 55 /// 56 /// If such a definition is available at an instruction that loads from this 57 /// frame offset, we have detected a redundant load. For example, if the 58 /// previous stack definition is available at the following instruction, this 59 /// is an example of a redundant stack load: 60 /// 61 /// stack load: RAX <= MEM[FRAME - 0x5c] 62 /// 63 /// The fourth step will use this info to actually modify redundant loads. In 64 /// our running example, we would change the stack load to the following reg 65 /// move: 66 /// 67 /// RAX <= RAX // can be deleted 68 /// 69 /// In this example, since the store source register is the same as the load 70 /// destination register, this creates a redundant MOV that can be deleted. 71 /// 72 /// Finally, another analysis propagates information about which instructions 73 /// are using (loading from) a stack position -- see StackReachingUses. If a 74 /// store sees no use of the value it is storing, it is eliminated. 75 /// 76 class FrameOptimizerPass : public BinaryFunctionPass { 77 /// Stats aggregating variables 78 uint64_t NumRedundantLoads{0}; 79 uint64_t NumRedundantStores{0}; 80 uint64_t FreqRedundantLoads{0}; 81 uint64_t FreqRedundantStores{0}; 82 uint64_t FreqLoadsChangedToReg{0}; 83 uint64_t FreqLoadsChangedToImm{0}; 84 uint64_t NumLoadsDeleted{0}; 85 uint64_t FreqLoadsDeleted{0}; 86 87 DenseSet<const BinaryFunction *> FuncsChanged; 88 89 std::mutex FuncsChangedMutex; 90 91 /// Perform a dataflow analysis in \p BF to reveal unnecessary reloads from 92 /// the frame. Use the analysis to convert memory loads to register moves or 93 /// immediate loads. Delete redundant register moves. 94 void removeUnnecessaryLoads(const RegAnalysis &RA, const FrameAnalysis &FA, 95 BinaryFunction &BF); 96 97 /// Use information from stack frame usage to delete unused stores. 98 void removeUnusedStores(const FrameAnalysis &FA, BinaryFunction &BF); 99 100 /// Perform shrinkwrapping step 101 Error performShrinkWrapping(const RegAnalysis &RA, const FrameAnalysis &FA, 102 BinaryContext &BC); 103 104 public: FrameOptimizerPass(const cl::opt<bool> & PrintPass)105 explicit FrameOptimizerPass(const cl::opt<bool> &PrintPass) 106 : BinaryFunctionPass(PrintPass) {} 107 getName()108 const char *getName() const override { return "frame-optimizer"; } 109 110 /// Pass entry point 111 Error runOnFunctions(BinaryContext &BC) override; 112 shouldPrint(const BinaryFunction & BF)113 bool shouldPrint(const BinaryFunction &BF) const override { 114 return BinaryFunctionPass::shouldPrint(BF) && FuncsChanged.count(&BF) > 0; 115 } 116 }; 117 118 } // namespace bolt 119 120 } // namespace llvm 121 122 #endif 123