1 //= InstrumentationRuntimeLibrary.cpp - The Instrumentation Runtime Library =// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "bolt/Core/JumpTable.h" 14 #include "bolt/Utils/CommandLineOpts.h" 15 #include "llvm/ExecutionEngine/RuntimeDyld.h" 16 #include "llvm/MC/MCStreamer.h" 17 #include "llvm/Support/Alignment.h" 18 #include "llvm/Support/CommandLine.h" 19 20 using namespace llvm; 21 using namespace bolt; 22 23 namespace opts { 24 25 cl::opt<std::string> RuntimeInstrumentationLib( 26 "runtime-instrumentation-lib", 27 cl::desc("specify file name of the runtime instrumentation library"), 28 cl::ZeroOrMore, cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory)); 29 30 extern cl::opt<bool> InstrumentationFileAppendPID; 31 extern cl::opt<bool> ConservativeInstrumentation; 32 extern cl::opt<std::string> InstrumentationFilename; 33 extern cl::opt<std::string> InstrumentationBinpath; 34 extern cl::opt<uint32_t> InstrumentationSleepTime; 35 extern cl::opt<bool> InstrumentationNoCountersClear; 36 extern cl::opt<bool> InstrumentationWaitForks; 37 extern cl::opt<JumpTableSupportLevel> JumpTables; 38 39 } // namespace opts 40 41 void InstrumentationRuntimeLibrary::adjustCommandLineOptions( 42 const BinaryContext &BC) const { 43 if (!BC.HasRelocations) { 44 errs() << "BOLT-ERROR: instrumentation runtime libraries require " 45 "relocations\n"; 46 exit(1); 47 } 48 if (opts::JumpTables != JTS_MOVE) { 49 opts::JumpTables = JTS_MOVE; 50 outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n"; 51 } 52 if (!BC.StartFunctionAddress) { 53 errs() << "BOLT-ERROR: instrumentation runtime libraries require a known " 54 "entry point of " 55 "the input binary\n"; 56 exit(1); 57 } 58 if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) { 59 errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic " 60 "section but instrumentation currently relies on patching " 61 "DT_FINI to write the profile\n"; 62 exit(1); 63 } 64 } 65 66 void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, 67 MCStreamer &Streamer) { 68 MCSection *Section = BC.isELF() 69 ? static_cast<MCSection *>(BC.Ctx->getELFSection( 70 ".bolt.instr.counters", ELF::SHT_PROGBITS, 71 BinarySection::getFlags(/*IsReadOnly=*/false, 72 /*IsText=*/false, 73 /*IsAllocatable=*/true) 74 75 )) 76 : static_cast<MCSection *>(BC.Ctx->getMachOSection( 77 "__BOLT", "__counters", MachO::S_REGULAR, 78 SectionKind::getData())); 79 80 if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) { 81 errs() << "BOLT-ERROR: instrumentation of static binary currently does not " 82 "support profile output on binary finalization, so it " 83 "requires -instrumentation-sleep-time=N (N>0) usage\n"; 84 exit(1); 85 } 86 87 Section->setAlignment(llvm::Align(BC.RegularPageSize)); 88 Streamer.SwitchSection(Section); 89 90 // EmitOffset is used to determine padding size for data alignment 91 uint64_t EmitOffset = 0; 92 93 auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) { 94 Streamer.emitLabel(Symbol); 95 if (IsGlobal) 96 Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global); 97 }; 98 99 auto emitLabelByName = [&BC, emitLabel](StringRef Name, 100 bool IsGlobal = true) { 101 MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name); 102 emitLabel(Symbol, IsGlobal); 103 }; 104 105 auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) { 106 const uint64_t Padding = alignTo(EmitOffset, Size) - EmitOffset; 107 if (Padding) { 108 Streamer.emitFill(Padding, 0); 109 EmitOffset += Padding; 110 } 111 }; 112 113 auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; }; 114 115 auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) { 116 emitPadding(Size); 117 emitDataSize(Size); 118 }; 119 120 auto emitFill = [&Streamer, emitDataSize, 121 emitLabel](unsigned Size, MCSymbol *Symbol = nullptr, 122 uint8_t Byte = 0) { 123 emitDataSize(Size); 124 if (Symbol) 125 emitLabel(Symbol, /*IsGlobal*/ false); 126 Streamer.emitFill(Size, Byte); 127 }; 128 129 auto emitValue = [&BC, &Streamer, emitDataPadding, 130 emitLabel](MCSymbol *Symbol, const MCExpr *Value) { 131 const unsigned Psize = BC.AsmInfo->getCodePointerSize(); 132 emitDataPadding(Psize); 133 emitLabel(Symbol); 134 if (Value) 135 Streamer.emitValue(Value, Psize); 136 else 137 Streamer.emitFill(Psize, 0); 138 }; 139 140 auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName]( 141 StringRef Name, uint64_t Value, unsigned Size = 4) { 142 emitDataPadding(Size); 143 emitLabelByName(Name); 144 Streamer.emitIntValue(Value, Size); 145 }; 146 147 auto emitString = [&Streamer, emitDataSize, emitLabelByName, 148 emitFill](StringRef Name, StringRef Contents) { 149 emitDataSize(Contents.size()); 150 emitLabelByName(Name); 151 Streamer.emitBytes(Contents); 152 emitFill(1); 153 }; 154 155 // All of the following symbols will be exported as globals to be used by the 156 // instrumentation runtime library to dump the instrumentation data to disk. 157 // Label marking start of the memory region containing instrumentation 158 // counters, total vector size is Counters.size() 8-byte counters 159 emitLabelByName("__bolt_instr_locations"); 160 for (MCSymbol *const &Label : Summary->Counters) 161 emitFill(sizeof(uint64_t), Label); 162 163 emitPadding(BC.RegularPageSize); 164 emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime); 165 emitIntValue("__bolt_instr_no_counters_clear", 166 !!opts::InstrumentationNoCountersClear, 1); 167 emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation, 168 1); 169 emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1); 170 emitIntValue("__bolt_num_counters", Summary->Counters.size()); 171 emitValue(Summary->IndCallCounterFuncPtr, nullptr); 172 emitValue(Summary->IndTailCallCounterFuncPtr, nullptr); 173 emitIntValue("__bolt_instr_num_ind_calls", 174 Summary->IndCallDescriptions.size()); 175 emitIntValue("__bolt_instr_num_ind_targets", 176 Summary->IndCallTargetDescriptions.size()); 177 emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size()); 178 emitString("__bolt_instr_filename", opts::InstrumentationFilename); 179 emitString("__bolt_instr_binpath", opts::InstrumentationBinpath); 180 emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1); 181 182 if (BC.isMachO()) { 183 MCSection *TablesSection = BC.Ctx->getMachOSection( 184 "__BOLT", "__tables", MachO::S_REGULAR, 185 SectionKind::getData()); 186 TablesSection->setAlignment(llvm::Align(BC.RegularPageSize)); 187 Streamer.SwitchSection(TablesSection); 188 emitString("__bolt_instr_tables", buildTables(BC)); 189 } 190 } 191 192 void InstrumentationRuntimeLibrary::link( 193 BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld, 194 std::function<void(RuntimeDyld &)> OnLoad) { 195 std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib); 196 loadLibrary(LibPath, RTDyld); 197 OnLoad(RTDyld); 198 RTDyld.finalizeWithMemoryManagerLocking(); 199 if (RTDyld.hasError()) { 200 outs() << "BOLT-ERROR: RTDyld failed: " << RTDyld.getErrorString() << "\n"; 201 exit(1); 202 } 203 204 if (BC.isMachO()) 205 return; 206 207 RuntimeFiniAddress = RTDyld.getSymbol("__bolt_instr_fini").getAddress(); 208 if (!RuntimeFiniAddress) { 209 errs() << "BOLT-ERROR: instrumentation library does not define " 210 "__bolt_instr_fini: " 211 << LibPath << "\n"; 212 exit(1); 213 } 214 RuntimeStartAddress = RTDyld.getSymbol("__bolt_instr_start").getAddress(); 215 if (!RuntimeStartAddress) { 216 errs() << "BOLT-ERROR: instrumentation library does not define " 217 "__bolt_instr_start: " 218 << LibPath << "\n"; 219 exit(1); 220 } 221 outs() << "BOLT-INFO: output linked against instrumentation runtime " 222 "library, lib entry point is 0x" 223 << Twine::utohexstr(RuntimeFiniAddress) << "\n"; 224 outs() << "BOLT-INFO: clear procedure is 0x" 225 << Twine::utohexstr( 226 RTDyld.getSymbol("__bolt_instr_clear_counters").getAddress()) 227 << "\n"; 228 229 emitTablesAsELFNote(BC); 230 } 231 232 std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) { 233 std::string TablesStr; 234 raw_string_ostream OS(TablesStr); 235 236 // This is sync'ed with runtime/instr.cpp:readDescriptions() 237 auto getOutputAddress = [](const BinaryFunction &Func, 238 uint64_t Offset) -> uint64_t { 239 return Offset == 0 240 ? Func.getOutputAddress() 241 : Func.translateInputToOutputAddress(Func.getAddress() + Offset); 242 }; 243 244 // Indirect targets need to be sorted for fast lookup during runtime 245 std::sort(Summary->IndCallTargetDescriptions.begin(), 246 Summary->IndCallTargetDescriptions.end(), 247 [&](const IndCallTargetDescription &A, 248 const IndCallTargetDescription &B) { 249 return getOutputAddress(*A.Target, A.ToLoc.Offset) < 250 getOutputAddress(*B.Target, B.ToLoc.Offset); 251 }); 252 253 // Start of the vector with descriptions (one CounterDescription for each 254 // counter), vector size is Counters.size() CounterDescription-sized elmts 255 const size_t IDSize = 256 Summary->IndCallDescriptions.size() * sizeof(IndCallDescription); 257 OS.write(reinterpret_cast<const char *>(&IDSize), 4); 258 for (const IndCallDescription &Desc : Summary->IndCallDescriptions) { 259 OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), 4); 260 OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.Offset), 4); 261 } 262 263 const size_t ITDSize = Summary->IndCallTargetDescriptions.size() * 264 sizeof(IndCallTargetDescription); 265 OS.write(reinterpret_cast<const char *>(&ITDSize), 4); 266 for (const IndCallTargetDescription &Desc : 267 Summary->IndCallTargetDescriptions) { 268 OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), 4); 269 OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.Offset), 4); 270 uint64_t TargetFuncAddress = 271 getOutputAddress(*Desc.Target, Desc.ToLoc.Offset); 272 OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8); 273 } 274 275 uint32_t FuncDescSize = Summary->getFDSize(); 276 OS.write(reinterpret_cast<const char *>(&FuncDescSize), 4); 277 for (const FunctionDescription &Desc : Summary->FunctionDescriptions) { 278 const size_t LeafNum = Desc.LeafNodes.size(); 279 OS.write(reinterpret_cast<const char *>(&LeafNum), 4); 280 for (const InstrumentedNode &LeafNode : Desc.LeafNodes) { 281 OS.write(reinterpret_cast<const char *>(&LeafNode.Node), 4); 282 OS.write(reinterpret_cast<const char *>(&LeafNode.Counter), 4); 283 } 284 const size_t EdgesNum = Desc.Edges.size(); 285 OS.write(reinterpret_cast<const char *>(&EdgesNum), 4); 286 for (const EdgeDescription &Edge : Desc.Edges) { 287 OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), 4); 288 OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.Offset), 4); 289 OS.write(reinterpret_cast<const char *>(&Edge.FromNode), 4); 290 OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), 4); 291 OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.Offset), 4); 292 OS.write(reinterpret_cast<const char *>(&Edge.ToNode), 4); 293 OS.write(reinterpret_cast<const char *>(&Edge.Counter), 4); 294 } 295 const size_t CallsNum = Desc.Calls.size(); 296 OS.write(reinterpret_cast<const char *>(&CallsNum), 4); 297 for (const CallDescription &Call : Desc.Calls) { 298 OS.write(reinterpret_cast<const char *>(&Call.FromLoc.FuncString), 4); 299 OS.write(reinterpret_cast<const char *>(&Call.FromLoc.Offset), 4); 300 OS.write(reinterpret_cast<const char *>(&Call.FromNode), 4); 301 OS.write(reinterpret_cast<const char *>(&Call.ToLoc.FuncString), 4); 302 OS.write(reinterpret_cast<const char *>(&Call.ToLoc.Offset), 4); 303 OS.write(reinterpret_cast<const char *>(&Call.Counter), 4); 304 uint64_t TargetFuncAddress = 305 getOutputAddress(*Call.Target, Call.ToLoc.Offset); 306 OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8); 307 } 308 const size_t EntryNum = Desc.EntryNodes.size(); 309 OS.write(reinterpret_cast<const char *>(&EntryNum), 4); 310 for (const EntryNode &EntryNode : Desc.EntryNodes) { 311 OS.write(reinterpret_cast<const char *>(&EntryNode.Node), 8); 312 uint64_t TargetFuncAddress = 313 getOutputAddress(*Desc.Function, EntryNode.Address); 314 OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8); 315 } 316 } 317 // Our string table lives immediately after descriptions vector 318 OS << Summary->StringTable; 319 OS.flush(); 320 321 return TablesStr; 322 } 323 324 void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) { 325 std::string TablesStr = buildTables(BC); 326 const std::string BoltInfo = BinarySection::encodeELFNote( 327 "BOLT", TablesStr, BinarySection::NT_BOLT_INSTRUMENTATION_TABLES); 328 BC.registerOrUpdateNoteSection(".bolt.instr.tables", copyByteArray(BoltInfo), 329 BoltInfo.size(), 330 /*Alignment=*/1, 331 /*IsReadOnly=*/true, ELF::SHT_NOTE); 332 } 333