xref: /llvm-project/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp (revision 0a5edb4de408ae0405f85c3e4c6da5233f185f63)
1 //===- bolt/RuntimeLibs/InstrumentationRuntimeLibrary.cpp -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the InstrumentationRuntimeLibrary class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Core/JumpTable.h"
16 #include "bolt/Core/Linker.h"
17 #include "bolt/Utils/CommandLineOpts.h"
18 #include "llvm/MC/MCStreamer.h"
19 #include "llvm/Support/Alignment.h"
20 #include "llvm/Support/CommandLine.h"
21 
22 using namespace llvm;
23 using namespace bolt;
24 
25 namespace opts {
26 
27 cl::opt<std::string> RuntimeInstrumentationLib(
28     "runtime-instrumentation-lib",
29     cl::desc("specify path of the runtime instrumentation library"),
30     cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory));
31 
32 extern cl::opt<bool> InstrumentationFileAppendPID;
33 extern cl::opt<bool> ConservativeInstrumentation;
34 extern cl::opt<std::string> InstrumentationFilename;
35 extern cl::opt<std::string> InstrumentationBinpath;
36 extern cl::opt<uint32_t> InstrumentationSleepTime;
37 extern cl::opt<bool> InstrumentationNoCountersClear;
38 extern cl::opt<bool> InstrumentationWaitForks;
39 extern cl::opt<JumpTableSupportLevel> JumpTables;
40 
41 } // namespace opts
42 
43 void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
44     const BinaryContext &BC) const {
45   if (!BC.HasRelocations) {
46     errs() << "BOLT-ERROR: instrumentation runtime libraries require "
47               "relocations\n";
48     exit(1);
49   }
50   if (opts::JumpTables != JTS_MOVE) {
51     opts::JumpTables = JTS_MOVE;
52     outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
53   }
54   if (!BC.StartFunctionAddress) {
55     errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
56               "entry point of "
57               "the input binary\n";
58     exit(1);
59   }
60 
61   if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) {
62     errs() << "BOLT-ERROR: instrumentation of static binary currently does not "
63               "support profile output on binary finalization, so it "
64               "requires -instrumentation-sleep-time=N (N>0) usage\n";
65     exit(1);
66   }
67 
68   if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) &&
69       opts::InstrumentationFileAppendPID) {
70     errs()
71         << "BOLT-ERROR: instrumentation-file-append-pid is not compatible with "
72            "instrumentation-sleep-time and instrumentation-wait-forks. If you "
73            "want a separate profile for each fork, it can only be dumped in "
74            "the end of process when instrumentation-file-append-pid is used.\n";
75     exit(1);
76   }
77 }
78 
79 void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
80                                                MCStreamer &Streamer) {
81   MCSection *Section = BC.isELF()
82                            ? static_cast<MCSection *>(BC.Ctx->getELFSection(
83                                  ".bolt.instr.counters", ELF::SHT_PROGBITS,
84                                  BinarySection::getFlags(/*IsReadOnly=*/false,
85                                                          /*IsText=*/false,
86                                                          /*IsAllocatable=*/true)
87 
88                                      ))
89                            : static_cast<MCSection *>(BC.Ctx->getMachOSection(
90                                  "__BOLT", "__counters", MachO::S_REGULAR,
91                                  SectionKind::getData()));
92 
93   Section->setAlignment(llvm::Align(BC.RegularPageSize));
94   Streamer.switchSection(Section);
95 
96   // EmitOffset is used to determine padding size for data alignment
97   uint64_t EmitOffset = 0;
98 
99   auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) {
100     Streamer.emitLabel(Symbol);
101     if (IsGlobal)
102       Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global);
103   };
104 
105   auto emitLabelByName = [&BC, emitLabel](StringRef Name,
106                                           bool IsGlobal = true) {
107     MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name);
108     emitLabel(Symbol, IsGlobal);
109   };
110 
111   auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) {
112     const uint64_t Padding = alignTo(EmitOffset, Size) - EmitOffset;
113     if (Padding) {
114       Streamer.emitFill(Padding, 0);
115       EmitOffset += Padding;
116     }
117   };
118 
119   auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; };
120 
121   auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) {
122     emitPadding(Size);
123     emitDataSize(Size);
124   };
125 
126   auto emitFill = [&Streamer, emitDataSize,
127                    emitLabel](unsigned Size, MCSymbol *Symbol = nullptr,
128                               uint8_t Byte = 0) {
129     emitDataSize(Size);
130     if (Symbol)
131       emitLabel(Symbol, /*IsGlobal*/ false);
132     Streamer.emitFill(Size, Byte);
133   };
134 
135   auto emitValue = [&BC, &Streamer, emitDataPadding,
136                     emitLabel](MCSymbol *Symbol, const MCExpr *Value) {
137     const unsigned Psize = BC.AsmInfo->getCodePointerSize();
138     emitDataPadding(Psize);
139     emitLabel(Symbol);
140     if (Value)
141       Streamer.emitValue(Value, Psize);
142     else
143       Streamer.emitFill(Psize, 0);
144   };
145 
146   auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName](
147                           StringRef Name, uint64_t Value, unsigned Size = 4) {
148     emitDataPadding(Size);
149     emitLabelByName(Name);
150     Streamer.emitIntValue(Value, Size);
151   };
152 
153   auto emitString = [&Streamer, emitDataSize, emitLabelByName,
154                      emitFill](StringRef Name, StringRef Contents) {
155     emitDataSize(Contents.size());
156     emitLabelByName(Name);
157     Streamer.emitBytes(Contents);
158     emitFill(1);
159   };
160 
161   // All of the following symbols will be exported as globals to be used by the
162   // instrumentation runtime library to dump the instrumentation data to disk.
163   // Label marking start of the memory region containing instrumentation
164   // counters, total vector size is Counters.size() 8-byte counters
165   emitLabelByName("__bolt_instr_locations");
166   for (MCSymbol *const &Label : Summary->Counters)
167     emitFill(sizeof(uint64_t), Label);
168 
169   emitPadding(BC.RegularPageSize);
170   emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime);
171   emitIntValue("__bolt_instr_no_counters_clear",
172                !!opts::InstrumentationNoCountersClear, 1);
173   emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation,
174                1);
175   emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
176   emitIntValue("__bolt_num_counters", Summary->Counters.size());
177   emitValue(Summary->IndCallCounterFuncPtr, nullptr);
178   emitValue(Summary->IndTailCallCounterFuncPtr, nullptr);
179   emitIntValue("__bolt_instr_num_ind_calls",
180                Summary->IndCallDescriptions.size());
181   emitIntValue("__bolt_instr_num_ind_targets",
182                Summary->IndCallTargetDescriptions.size());
183   emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size());
184   emitString("__bolt_instr_filename", opts::InstrumentationFilename);
185   emitString("__bolt_instr_binpath", opts::InstrumentationBinpath);
186   emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1);
187 
188   if (BC.isMachO()) {
189     MCSection *TablesSection = BC.Ctx->getMachOSection(
190         "__BOLT", "__tables", MachO::S_REGULAR, SectionKind::getData());
191     TablesSection->setAlignment(llvm::Align(BC.RegularPageSize));
192     Streamer.switchSection(TablesSection);
193     emitString("__bolt_instr_tables", buildTables(BC));
194   }
195 }
196 
197 void InstrumentationRuntimeLibrary::link(
198     BinaryContext &BC, StringRef ToolPath, BOLTLinker &Linker,
199     BOLTLinker::SectionsMapper MapSections) {
200   std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib);
201   loadLibrary(LibPath, Linker, MapSections);
202 
203   if (BC.isMachO())
204     return;
205 
206   RuntimeFiniAddress = Linker.lookupSymbol("__bolt_instr_fini").value_or(0);
207   if (!RuntimeFiniAddress) {
208     errs() << "BOLT-ERROR: instrumentation library does not define "
209               "__bolt_instr_fini: "
210            << LibPath << "\n";
211     exit(1);
212   }
213   RuntimeStartAddress = Linker.lookupSymbol("__bolt_instr_start").value_or(0);
214   if (!RuntimeStartAddress) {
215     errs() << "BOLT-ERROR: instrumentation library does not define "
216               "__bolt_instr_start: "
217            << LibPath << "\n";
218     exit(1);
219   }
220   outs() << "BOLT-INFO: output linked against instrumentation runtime "
221             "library, lib entry point is 0x"
222          << Twine::utohexstr(RuntimeFiniAddress) << "\n";
223   outs() << "BOLT-INFO: clear procedure is 0x"
224          << Twine::utohexstr(
225                 Linker.lookupSymbol("__bolt_instr_clear_counters").value_or(0))
226          << "\n";
227 
228   emitTablesAsELFNote(BC);
229 }
230 
231 std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) {
232   std::string TablesStr;
233   raw_string_ostream OS(TablesStr);
234 
235   // This is sync'ed with runtime/instr.cpp:readDescriptions()
236   auto getOutputAddress = [](const BinaryFunction &Func,
237                              uint64_t Offset) -> uint64_t {
238     return Offset == 0
239                ? Func.getOutputAddress()
240                : Func.translateInputToOutputAddress(Func.getAddress() + Offset);
241   };
242 
243   // Indirect targets need to be sorted for fast lookup during runtime
244   llvm::sort(Summary->IndCallTargetDescriptions,
245              [&](const IndCallTargetDescription &A,
246                  const IndCallTargetDescription &B) {
247                return getOutputAddress(*A.Target, A.ToLoc.Offset) <
248                       getOutputAddress(*B.Target, B.ToLoc.Offset);
249              });
250 
251   // Start of the vector with descriptions (one CounterDescription for each
252   // counter), vector size is Counters.size() CounterDescription-sized elmts
253   const size_t IDSize =
254       Summary->IndCallDescriptions.size() * sizeof(IndCallDescription);
255   OS.write(reinterpret_cast<const char *>(&IDSize), 4);
256   for (const IndCallDescription &Desc : Summary->IndCallDescriptions) {
257     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), 4);
258     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.Offset), 4);
259   }
260 
261   const size_t ITDSize = Summary->IndCallTargetDescriptions.size() *
262                          sizeof(IndCallTargetDescription);
263   OS.write(reinterpret_cast<const char *>(&ITDSize), 4);
264   for (const IndCallTargetDescription &Desc :
265        Summary->IndCallTargetDescriptions) {
266     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), 4);
267     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.Offset), 4);
268     uint64_t TargetFuncAddress =
269         getOutputAddress(*Desc.Target, Desc.ToLoc.Offset);
270     OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
271   }
272 
273   uint32_t FuncDescSize = Summary->getFDSize();
274   OS.write(reinterpret_cast<const char *>(&FuncDescSize), 4);
275   for (const FunctionDescription &Desc : Summary->FunctionDescriptions) {
276     const size_t LeafNum = Desc.LeafNodes.size();
277     OS.write(reinterpret_cast<const char *>(&LeafNum), 4);
278     for (const InstrumentedNode &LeafNode : Desc.LeafNodes) {
279       OS.write(reinterpret_cast<const char *>(&LeafNode.Node), 4);
280       OS.write(reinterpret_cast<const char *>(&LeafNode.Counter), 4);
281     }
282     const size_t EdgesNum = Desc.Edges.size();
283     OS.write(reinterpret_cast<const char *>(&EdgesNum), 4);
284     for (const EdgeDescription &Edge : Desc.Edges) {
285       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), 4);
286       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.Offset), 4);
287       OS.write(reinterpret_cast<const char *>(&Edge.FromNode), 4);
288       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), 4);
289       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.Offset), 4);
290       OS.write(reinterpret_cast<const char *>(&Edge.ToNode), 4);
291       OS.write(reinterpret_cast<const char *>(&Edge.Counter), 4);
292     }
293     const size_t CallsNum = Desc.Calls.size();
294     OS.write(reinterpret_cast<const char *>(&CallsNum), 4);
295     for (const CallDescription &Call : Desc.Calls) {
296       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.FuncString), 4);
297       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.Offset), 4);
298       OS.write(reinterpret_cast<const char *>(&Call.FromNode), 4);
299       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.FuncString), 4);
300       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.Offset), 4);
301       OS.write(reinterpret_cast<const char *>(&Call.Counter), 4);
302       uint64_t TargetFuncAddress =
303           getOutputAddress(*Call.Target, Call.ToLoc.Offset);
304       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
305     }
306     const size_t EntryNum = Desc.EntryNodes.size();
307     OS.write(reinterpret_cast<const char *>(&EntryNum), 4);
308     for (const EntryNode &EntryNode : Desc.EntryNodes) {
309       OS.write(reinterpret_cast<const char *>(&EntryNode.Node), 8);
310       uint64_t TargetFuncAddress =
311           getOutputAddress(*Desc.Function, EntryNode.Address);
312       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
313     }
314   }
315   // Our string table lives immediately after descriptions vector
316   OS << Summary->StringTable;
317 
318   return TablesStr;
319 }
320 
321 void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) {
322   std::string TablesStr = buildTables(BC);
323   const std::string BoltInfo = BinarySection::encodeELFNote(
324       "BOLT", TablesStr, BinarySection::NT_BOLT_INSTRUMENTATION_TABLES);
325   BC.registerOrUpdateNoteSection(".bolt.instr.tables", copyByteArray(BoltInfo),
326                                  BoltInfo.size(),
327                                  /*Alignment=*/1,
328                                  /*IsReadOnly=*/true, ELF::SHT_NOTE);
329 }
330