xref: /llvm-project/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp (revision a34c753fe709a624f5b087397fb05adeac2311e4)
1 //=  InstrumentationRuntimeLibrary.cpp - The Instrumentation Runtime Library =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/JumpTable.h"
14 #include "bolt/Utils/CommandLineOpts.h"
15 #include "llvm/ExecutionEngine/RuntimeDyld.h"
16 #include "llvm/MC/MCStreamer.h"
17 #include "llvm/Support/Alignment.h"
18 #include "llvm/Support/CommandLine.h"
19 
20 using namespace llvm;
21 using namespace bolt;
22 
23 namespace opts {
24 
25 cl::opt<std::string> RuntimeInstrumentationLib(
26     "runtime-instrumentation-lib",
27     cl::desc("specify file name of the runtime instrumentation library"),
28     cl::ZeroOrMore, cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory));
29 
30 extern cl::opt<bool> InstrumentationFileAppendPID;
31 extern cl::opt<bool> ConservativeInstrumentation;
32 extern cl::opt<std::string> InstrumentationFilename;
33 extern cl::opt<std::string> InstrumentationBinpath;
34 extern cl::opt<uint32_t> InstrumentationSleepTime;
35 extern cl::opt<bool> InstrumentationNoCountersClear;
36 extern cl::opt<bool> InstrumentationWaitForks;
37 extern cl::opt<JumpTableSupportLevel> JumpTables;
38 
39 } // namespace opts
40 
41 void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
42     const BinaryContext &BC) const {
43   if (!BC.HasRelocations) {
44     errs() << "BOLT-ERROR: instrumentation runtime libraries require "
45               "relocations\n";
46     exit(1);
47   }
48   if (opts::JumpTables != JTS_MOVE) {
49     opts::JumpTables = JTS_MOVE;
50     outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
51   }
52   if (!BC.StartFunctionAddress) {
53     errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
54               "entry point of "
55               "the input binary\n";
56     exit(1);
57   }
58   if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) {
59     errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
60               "section but instrumentation currently relies on patching "
61               "DT_FINI to write the profile\n";
62     exit(1);
63   }
64 }
65 
66 void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
67                                                MCStreamer &Streamer) {
68   MCSection *Section = BC.isELF()
69                            ? static_cast<MCSection *>(BC.Ctx->getELFSection(
70                                  ".bolt.instr.counters", ELF::SHT_PROGBITS,
71                                  BinarySection::getFlags(/*IsReadOnly=*/false,
72                                                          /*IsText=*/false,
73                                                          /*IsAllocatable=*/true)
74 
75                                      ))
76                            : static_cast<MCSection *>(BC.Ctx->getMachOSection(
77                                  "__BOLT", "__counters", MachO::S_REGULAR,
78                                  SectionKind::getData()));
79 
80   if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) {
81     errs() << "BOLT-ERROR: instrumentation of static binary currently does not "
82               "support profile output on binary finalization, so it "
83               "requires -instrumentation-sleep-time=N (N>0) usage\n";
84     exit(1);
85   }
86 
87   Section->setAlignment(llvm::Align(BC.RegularPageSize));
88   Streamer.SwitchSection(Section);
89 
90   // EmitOffset is used to determine padding size for data alignment
91   uint64_t EmitOffset = 0;
92 
93   auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) {
94     Streamer.emitLabel(Symbol);
95     if (IsGlobal)
96       Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global);
97   };
98 
99   auto emitLabelByName = [&BC, emitLabel](StringRef Name,
100                                           bool IsGlobal = true) {
101     MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name);
102     emitLabel(Symbol, IsGlobal);
103   };
104 
105   auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) {
106     const uint64_t Padding = alignTo(EmitOffset, Size) - EmitOffset;
107     if (Padding) {
108       Streamer.emitFill(Padding, 0);
109       EmitOffset += Padding;
110     }
111   };
112 
113   auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; };
114 
115   auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) {
116     emitPadding(Size);
117     emitDataSize(Size);
118   };
119 
120   auto emitFill = [&Streamer, emitDataSize,
121                    emitLabel](unsigned Size, MCSymbol *Symbol = nullptr,
122                               uint8_t Byte = 0) {
123     emitDataSize(Size);
124     if (Symbol)
125       emitLabel(Symbol, /*IsGlobal*/ false);
126     Streamer.emitFill(Size, Byte);
127   };
128 
129   auto emitValue = [&BC, &Streamer, emitDataPadding,
130                     emitLabel](MCSymbol *Symbol, const MCExpr *Value) {
131     const unsigned Psize = BC.AsmInfo->getCodePointerSize();
132     emitDataPadding(Psize);
133     emitLabel(Symbol);
134     if (Value)
135       Streamer.emitValue(Value, Psize);
136     else
137       Streamer.emitFill(Psize, 0);
138   };
139 
140   auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName](
141                           StringRef Name, uint64_t Value, unsigned Size = 4) {
142     emitDataPadding(Size);
143     emitLabelByName(Name);
144     Streamer.emitIntValue(Value, Size);
145   };
146 
147   auto emitString = [&Streamer, emitDataSize, emitLabelByName,
148                      emitFill](StringRef Name, StringRef Contents) {
149     emitDataSize(Contents.size());
150     emitLabelByName(Name);
151     Streamer.emitBytes(Contents);
152     emitFill(1);
153   };
154 
155   // All of the following symbols will be exported as globals to be used by the
156   // instrumentation runtime library to dump the instrumentation data to disk.
157   // Label marking start of the memory region containing instrumentation
158   // counters, total vector size is Counters.size() 8-byte counters
159   emitLabelByName("__bolt_instr_locations");
160   for (MCSymbol *const &Label : Summary->Counters)
161     emitFill(sizeof(uint64_t), Label);
162 
163   emitPadding(BC.RegularPageSize);
164   emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime);
165   emitIntValue("__bolt_instr_no_counters_clear",
166                !!opts::InstrumentationNoCountersClear, 1);
167   emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation,
168                1);
169   emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
170   emitIntValue("__bolt_num_counters", Summary->Counters.size());
171   emitValue(Summary->IndCallCounterFuncPtr, nullptr);
172   emitValue(Summary->IndTailCallCounterFuncPtr, nullptr);
173   emitIntValue("__bolt_instr_num_ind_calls",
174                Summary->IndCallDescriptions.size());
175   emitIntValue("__bolt_instr_num_ind_targets",
176                Summary->IndCallTargetDescriptions.size());
177   emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size());
178   emitString("__bolt_instr_filename", opts::InstrumentationFilename);
179   emitString("__bolt_instr_binpath", opts::InstrumentationBinpath);
180   emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1);
181 
182   if (BC.isMachO()) {
183     MCSection *TablesSection = BC.Ctx->getMachOSection(
184                                  "__BOLT", "__tables", MachO::S_REGULAR,
185                                  SectionKind::getData());
186     TablesSection->setAlignment(llvm::Align(BC.RegularPageSize));
187     Streamer.SwitchSection(TablesSection);
188     emitString("__bolt_instr_tables", buildTables(BC));
189   }
190 }
191 
192 void InstrumentationRuntimeLibrary::link(
193     BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld,
194     std::function<void(RuntimeDyld &)> OnLoad) {
195   std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib);
196   loadLibrary(LibPath, RTDyld);
197   OnLoad(RTDyld);
198   RTDyld.finalizeWithMemoryManagerLocking();
199   if (RTDyld.hasError()) {
200     outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld.getErrorString() << "\n";
201     exit(1);
202   }
203 
204   if (BC.isMachO())
205     return;
206 
207   RuntimeFiniAddress = RTDyld.getSymbol("__bolt_instr_fini").getAddress();
208   if (!RuntimeFiniAddress) {
209     errs() << "BOLT-ERROR: instrumentation library does not define "
210               "__bolt_instr_fini: "
211            << LibPath << "\n";
212     exit(1);
213   }
214   RuntimeStartAddress = RTDyld.getSymbol("__bolt_instr_start").getAddress();
215   if (!RuntimeStartAddress) {
216     errs() << "BOLT-ERROR: instrumentation library does not define "
217               "__bolt_instr_start: "
218            << LibPath << "\n";
219     exit(1);
220   }
221   outs() << "BOLT-INFO: output linked against instrumentation runtime "
222             "library, lib entry point is 0x"
223          << Twine::utohexstr(RuntimeFiniAddress) << "\n";
224   outs() << "BOLT-INFO: clear procedure is 0x"
225          << Twine::utohexstr(
226                 RTDyld.getSymbol("__bolt_instr_clear_counters").getAddress())
227          << "\n";
228 
229   emitTablesAsELFNote(BC);
230 }
231 
232 std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) {
233   std::string TablesStr;
234   raw_string_ostream OS(TablesStr);
235 
236   // This is sync'ed with runtime/instr.cpp:readDescriptions()
237   auto getOutputAddress = [](const BinaryFunction &Func,
238                              uint64_t Offset) -> uint64_t {
239     return Offset == 0
240                ? Func.getOutputAddress()
241                : Func.translateInputToOutputAddress(Func.getAddress() + Offset);
242   };
243 
244   // Indirect targets need to be sorted for fast lookup during runtime
245   std::sort(Summary->IndCallTargetDescriptions.begin(),
246             Summary->IndCallTargetDescriptions.end(),
247             [&](const IndCallTargetDescription &A,
248                 const IndCallTargetDescription &B) {
249               return getOutputAddress(*A.Target, A.ToLoc.Offset) <
250                      getOutputAddress(*B.Target, B.ToLoc.Offset);
251             });
252 
253   // Start of the vector with descriptions (one CounterDescription for each
254   // counter), vector size is Counters.size() CounterDescription-sized elmts
255   const size_t IDSize =
256       Summary->IndCallDescriptions.size() * sizeof(IndCallDescription);
257   OS.write(reinterpret_cast<const char *>(&IDSize), 4);
258   for (const IndCallDescription &Desc : Summary->IndCallDescriptions) {
259     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), 4);
260     OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.Offset), 4);
261   }
262 
263   const size_t ITDSize = Summary->IndCallTargetDescriptions.size() *
264                          sizeof(IndCallTargetDescription);
265   OS.write(reinterpret_cast<const char *>(&ITDSize), 4);
266   for (const IndCallTargetDescription &Desc :
267        Summary->IndCallTargetDescriptions) {
268     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), 4);
269     OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.Offset), 4);
270     uint64_t TargetFuncAddress =
271         getOutputAddress(*Desc.Target, Desc.ToLoc.Offset);
272     OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
273   }
274 
275   uint32_t FuncDescSize = Summary->getFDSize();
276   OS.write(reinterpret_cast<const char *>(&FuncDescSize), 4);
277   for (const FunctionDescription &Desc : Summary->FunctionDescriptions) {
278     const size_t LeafNum = Desc.LeafNodes.size();
279     OS.write(reinterpret_cast<const char *>(&LeafNum), 4);
280     for (const InstrumentedNode &LeafNode : Desc.LeafNodes) {
281       OS.write(reinterpret_cast<const char *>(&LeafNode.Node), 4);
282       OS.write(reinterpret_cast<const char *>(&LeafNode.Counter), 4);
283     }
284     const size_t EdgesNum = Desc.Edges.size();
285     OS.write(reinterpret_cast<const char *>(&EdgesNum), 4);
286     for (const EdgeDescription &Edge : Desc.Edges) {
287       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), 4);
288       OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.Offset), 4);
289       OS.write(reinterpret_cast<const char *>(&Edge.FromNode), 4);
290       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), 4);
291       OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.Offset), 4);
292       OS.write(reinterpret_cast<const char *>(&Edge.ToNode), 4);
293       OS.write(reinterpret_cast<const char *>(&Edge.Counter), 4);
294     }
295     const size_t CallsNum = Desc.Calls.size();
296     OS.write(reinterpret_cast<const char *>(&CallsNum), 4);
297     for (const CallDescription &Call : Desc.Calls) {
298       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.FuncString), 4);
299       OS.write(reinterpret_cast<const char *>(&Call.FromLoc.Offset), 4);
300       OS.write(reinterpret_cast<const char *>(&Call.FromNode), 4);
301       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.FuncString), 4);
302       OS.write(reinterpret_cast<const char *>(&Call.ToLoc.Offset), 4);
303       OS.write(reinterpret_cast<const char *>(&Call.Counter), 4);
304       uint64_t TargetFuncAddress =
305           getOutputAddress(*Call.Target, Call.ToLoc.Offset);
306       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
307     }
308     const size_t EntryNum = Desc.EntryNodes.size();
309     OS.write(reinterpret_cast<const char *>(&EntryNum), 4);
310     for (const EntryNode &EntryNode : Desc.EntryNodes) {
311       OS.write(reinterpret_cast<const char *>(&EntryNode.Node), 8);
312       uint64_t TargetFuncAddress =
313           getOutputAddress(*Desc.Function, EntryNode.Address);
314       OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8);
315     }
316   }
317   // Our string table lives immediately after descriptions vector
318   OS << Summary->StringTable;
319   OS.flush();
320 
321   return TablesStr;
322 }
323 
324 void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) {
325   std::string TablesStr = buildTables(BC);
326   const std::string BoltInfo = BinarySection::encodeELFNote(
327       "BOLT", TablesStr, BinarySection::NT_BOLT_INSTRUMENTATION_TABLES);
328   BC.registerOrUpdateNoteSection(".bolt.instr.tables", copyByteArray(BoltInfo),
329                                  BoltInfo.size(),
330                                  /*Alignment=*/1,
331                                  /*IsReadOnly=*/true, ELF::SHT_NOTE);
332 }
333