xref: /llvm-project/bolt/lib/Passes/Aligner.cpp (revision a5f3d1a803020167bd9d494a8a3921e7dcc1550a)
1 //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AlignerPass class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/Aligner.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 
16 #define DEBUG_TYPE "bolt-aligner"
17 
18 using namespace llvm;
19 
20 namespace opts {
21 
22 extern cl::OptionCategory BoltOptCategory;
23 
24 extern cl::opt<bool> AlignBlocks;
25 extern cl::opt<bool> PreserveBlocksAlignment;
26 extern cl::opt<unsigned> AlignFunctions;
27 
28 cl::opt<unsigned>
29 AlignBlocksMinSize("align-blocks-min-size",
30   cl::desc("minimal size of the basic block that should be aligned"),
31   cl::init(0),
32   cl::ZeroOrMore,
33   cl::Hidden,
34   cl::cat(BoltOptCategory));
35 
36 cl::opt<unsigned> AlignBlocksThreshold(
37     "align-blocks-threshold",
38     cl::desc(
39         "align only blocks with frequency larger than containing function "
40         "execution frequency specified in percent. E.g. 1000 means aligning "
41         "blocks that are 10 times more frequently executed than the "
42         "containing function."),
43     cl::init(800), cl::Hidden, cl::cat(BoltOptCategory));
44 
45 cl::opt<unsigned> AlignFunctionsMaxBytes(
46     "align-functions-max-bytes",
47     cl::desc("maximum number of bytes to use to align functions"), cl::init(32),
48     cl::cat(BoltOptCategory));
49 
50 cl::opt<unsigned>
51 BlockAlignment("block-alignment",
52   cl::desc("boundary to use for alignment of basic blocks"),
53   cl::init(16),
54   cl::ZeroOrMore,
55   cl::cat(BoltOptCategory));
56 
57 cl::opt<bool>
58     UseCompactAligner("use-compact-aligner",
59                       cl::desc("Use compact approach for aligning functions"),
60                       cl::init(true), cl::cat(BoltOptCategory));
61 
62 } // end namespace opts
63 
64 namespace llvm {
65 namespace bolt {
66 
67 // Align function to the specified byte-boundary (typically, 64) offsetting
68 // the fuction by not more than the corresponding value
alignMaxBytes(BinaryFunction & Function)69 static void alignMaxBytes(BinaryFunction &Function) {
70   Function.setAlignment(opts::AlignFunctions);
71   Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
72   Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
73 }
74 
75 // Align function to the specified byte-boundary (typically, 64) offsetting
76 // the fuction by not more than the minimum over
77 // -- the size of the function
78 // -- the specified number of bytes
alignCompact(BinaryFunction & Function,const MCCodeEmitter * Emitter)79 static void alignCompact(BinaryFunction &Function,
80                          const MCCodeEmitter *Emitter) {
81   const BinaryContext &BC = Function.getBinaryContext();
82   size_t HotSize = 0;
83   size_t ColdSize = 0;
84 
85   for (const BinaryBasicBlock &BB : Function)
86     if (BB.isSplit())
87       ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
88     else
89       HotSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
90 
91   Function.setAlignment(opts::AlignFunctions);
92   if (HotSize > 0)
93     Function.setMaxAlignmentBytes(
94       std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize));
95 
96   // using the same option, max-align-bytes, both for cold and hot parts of the
97   // functions, as aligning cold functions typically does not affect performance
98   if (ColdSize > 0)
99     Function.setMaxColdAlignmentBytes(
100       std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize));
101 }
102 
alignBlocks(BinaryFunction & Function,const MCCodeEmitter * Emitter)103 void AlignerPass::alignBlocks(BinaryFunction &Function,
104                               const MCCodeEmitter *Emitter) {
105   if (!Function.hasValidProfile() || !Function.isSimple())
106     return;
107 
108   const BinaryContext &BC = Function.getBinaryContext();
109 
110   const uint64_t FuncCount =
111       std::max<uint64_t>(1, Function.getKnownExecutionCount());
112   BinaryBasicBlock *PrevBB = nullptr;
113   for (BinaryBasicBlock *BB : Function.getLayout().blocks()) {
114     uint64_t Count = BB->getKnownExecutionCount();
115 
116     if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
117       PrevBB = BB;
118       continue;
119     }
120 
121     uint64_t FTCount = 0;
122     if (PrevBB && PrevBB->getFallthrough() == BB)
123       FTCount = PrevBB->getBranchInfo(*BB).Count;
124 
125     PrevBB = BB;
126 
127     if (Count < FTCount * 2)
128       continue;
129 
130     const uint64_t BlockSize =
131         BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
132     const uint64_t BytesToUse =
133         std::min<uint64_t>(opts::BlockAlignment - 1, BlockSize);
134 
135     if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
136       continue;
137 
138     BB->setAlignment(opts::BlockAlignment);
139     BB->setAlignmentMaxBytes(BytesToUse);
140 
141     // Update stats.
142     LLVM_DEBUG(
143       std::unique_lock<llvm::sys::RWMutex> Lock(AlignHistogramMtx);
144       AlignHistogram[BytesToUse]++;
145       AlignedBlocksCount += BB->getKnownExecutionCount();
146     );
147   }
148 }
149 
runOnFunctions(BinaryContext & BC)150 Error AlignerPass::runOnFunctions(BinaryContext &BC) {
151   if (!BC.HasRelocations)
152     return Error::success();
153 
154   AlignHistogram.resize(opts::BlockAlignment);
155 
156   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
157     // Create a separate MCCodeEmitter to allow lock free execution
158     BinaryContext::IndependentCodeEmitter Emitter =
159         BC.createIndependentMCCodeEmitter();
160 
161     if (opts::UseCompactAligner)
162       alignCompact(BF, Emitter.MCE.get());
163     else
164       alignMaxBytes(BF);
165 
166     if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
167       alignBlocks(BF, Emitter.MCE.get());
168   };
169 
170   ParallelUtilities::runOnEachFunction(
171       BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun,
172       ParallelUtilities::PredicateTy(nullptr), "AlignerPass");
173 
174   LLVM_DEBUG(
175     dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
176     for (unsigned I = 1; I < AlignHistogram.size(); ++I)
177       dbgs() << "  " << I << " : " << AlignHistogram[I] << '\n';
178 
179     dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
180            << AlignedBlocksCount << '\n';
181   );
182   return Error::success();
183 }
184 
185 } // end namespace bolt
186 } // end namespace llvm
187