1 //===- bolt/RuntimeLibs/InstrumentationRuntimeLibrary.cpp -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the InstrumentationRuntimeLibrary class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" 14 #include "bolt/Core/BinaryFunction.h" 15 #include "bolt/Core/JumpTable.h" 16 #include "bolt/Core/Linker.h" 17 #include "bolt/Utils/CommandLineOpts.h" 18 #include "llvm/MC/MCStreamer.h" 19 #include "llvm/Support/Alignment.h" 20 #include "llvm/Support/CommandLine.h" 21 22 using namespace llvm; 23 using namespace bolt; 24 25 namespace opts { 26 27 cl::opt<std::string> RuntimeInstrumentationLib( 28 "runtime-instrumentation-lib", 29 cl::desc("specify path of the runtime instrumentation library"), 30 cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory)); 31 32 extern cl::opt<bool> InstrumentationFileAppendPID; 33 extern cl::opt<bool> ConservativeInstrumentation; 34 extern cl::opt<std::string> InstrumentationFilename; 35 extern cl::opt<std::string> InstrumentationBinpath; 36 extern cl::opt<uint32_t> InstrumentationSleepTime; 37 extern cl::opt<bool> InstrumentationNoCountersClear; 38 extern cl::opt<bool> InstrumentationWaitForks; 39 extern cl::opt<JumpTableSupportLevel> JumpTables; 40 41 } // namespace opts 42 43 void InstrumentationRuntimeLibrary::adjustCommandLineOptions( 44 const BinaryContext &BC) const { 45 if (!BC.HasRelocations) { 46 errs() << "BOLT-ERROR: instrumentation runtime libraries require " 47 "relocations\n"; 48 exit(1); 49 } 50 if (opts::JumpTables != JTS_MOVE) { 51 opts::JumpTables = JTS_MOVE; 52 outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n"; 53 } 54 if (!BC.StartFunctionAddress) { 55 errs() << "BOLT-ERROR: instrumentation runtime libraries require a known " 56 "entry point of " 57 "the input binary\n"; 58 exit(1); 59 } 60 61 if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) { 62 errs() << "BOLT-ERROR: instrumentation of static binary currently does not " 63 "support profile output on binary finalization, so it " 64 "requires -instrumentation-sleep-time=N (N>0) usage\n"; 65 exit(1); 66 } 67 68 if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) && 69 opts::InstrumentationFileAppendPID) { 70 errs() 71 << "BOLT-ERROR: instrumentation-file-append-pid is not compatible with " 72 "instrumentation-sleep-time and instrumentation-wait-forks. If you " 73 "want a separate profile for each fork, it can only be dumped in " 74 "the end of process when instrumentation-file-append-pid is used.\n"; 75 exit(1); 76 } 77 } 78 79 void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, 80 MCStreamer &Streamer) { 81 MCSection *Section = BC.isELF() 82 ? static_cast<MCSection *>(BC.Ctx->getELFSection( 83 ".bolt.instr.counters", ELF::SHT_PROGBITS, 84 BinarySection::getFlags(/*IsReadOnly=*/false, 85 /*IsText=*/false, 86 /*IsAllocatable=*/true) 87 88 )) 89 : static_cast<MCSection *>(BC.Ctx->getMachOSection( 90 "__BOLT", "__counters", MachO::S_REGULAR, 91 SectionKind::getData())); 92 93 Section->setAlignment(llvm::Align(BC.RegularPageSize)); 94 Streamer.switchSection(Section); 95 96 // EmitOffset is used to determine padding size for data alignment 97 uint64_t EmitOffset = 0; 98 99 auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) { 100 Streamer.emitLabel(Symbol); 101 if (IsGlobal) 102 Streamer.emitSymbolAttribute(Symbol, MCSymbolAttr::MCSA_Global); 103 }; 104 105 auto emitLabelByName = [&BC, emitLabel](StringRef Name, 106 bool IsGlobal = true) { 107 MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name); 108 emitLabel(Symbol, IsGlobal); 109 }; 110 111 auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) { 112 const uint64_t Padding = alignTo(EmitOffset, Size) - EmitOffset; 113 if (Padding) { 114 Streamer.emitFill(Padding, 0); 115 EmitOffset += Padding; 116 } 117 }; 118 119 auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; }; 120 121 auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) { 122 emitPadding(Size); 123 emitDataSize(Size); 124 }; 125 126 auto emitFill = [&Streamer, emitDataSize, 127 emitLabel](unsigned Size, MCSymbol *Symbol = nullptr, 128 uint8_t Byte = 0) { 129 emitDataSize(Size); 130 if (Symbol) 131 emitLabel(Symbol, /*IsGlobal*/ false); 132 Streamer.emitFill(Size, Byte); 133 }; 134 135 auto emitValue = [&BC, &Streamer, emitDataPadding, 136 emitLabel](MCSymbol *Symbol, const MCExpr *Value) { 137 const unsigned Psize = BC.AsmInfo->getCodePointerSize(); 138 emitDataPadding(Psize); 139 emitLabel(Symbol); 140 if (Value) 141 Streamer.emitValue(Value, Psize); 142 else 143 Streamer.emitFill(Psize, 0); 144 }; 145 146 auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName]( 147 StringRef Name, uint64_t Value, unsigned Size = 4) { 148 emitDataPadding(Size); 149 emitLabelByName(Name); 150 Streamer.emitIntValue(Value, Size); 151 }; 152 153 auto emitString = [&Streamer, emitDataSize, emitLabelByName, 154 emitFill](StringRef Name, StringRef Contents) { 155 emitDataSize(Contents.size()); 156 emitLabelByName(Name); 157 Streamer.emitBytes(Contents); 158 emitFill(1); 159 }; 160 161 // All of the following symbols will be exported as globals to be used by the 162 // instrumentation runtime library to dump the instrumentation data to disk. 163 // Label marking start of the memory region containing instrumentation 164 // counters, total vector size is Counters.size() 8-byte counters 165 emitLabelByName("__bolt_instr_locations"); 166 for (MCSymbol *const &Label : Summary->Counters) 167 emitFill(sizeof(uint64_t), Label); 168 169 emitPadding(BC.RegularPageSize); 170 emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime); 171 emitIntValue("__bolt_instr_no_counters_clear", 172 !!opts::InstrumentationNoCountersClear, 1); 173 emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation, 174 1); 175 emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1); 176 emitIntValue("__bolt_num_counters", Summary->Counters.size()); 177 emitValue(Summary->IndCallCounterFuncPtr, nullptr); 178 emitValue(Summary->IndTailCallCounterFuncPtr, nullptr); 179 emitIntValue("__bolt_instr_num_ind_calls", 180 Summary->IndCallDescriptions.size()); 181 emitIntValue("__bolt_instr_num_ind_targets", 182 Summary->IndCallTargetDescriptions.size()); 183 emitIntValue("__bolt_instr_num_funcs", Summary->FunctionDescriptions.size()); 184 emitString("__bolt_instr_filename", opts::InstrumentationFilename); 185 emitString("__bolt_instr_binpath", opts::InstrumentationBinpath); 186 emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1); 187 188 if (BC.isMachO()) { 189 MCSection *TablesSection = BC.Ctx->getMachOSection( 190 "__BOLT", "__tables", MachO::S_REGULAR, SectionKind::getData()); 191 TablesSection->setAlignment(llvm::Align(BC.RegularPageSize)); 192 Streamer.switchSection(TablesSection); 193 emitString("__bolt_instr_tables", buildTables(BC)); 194 } 195 } 196 197 void InstrumentationRuntimeLibrary::link( 198 BinaryContext &BC, StringRef ToolPath, BOLTLinker &Linker, 199 BOLTLinker::SectionsMapper MapSections) { 200 std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib); 201 loadLibrary(LibPath, Linker, MapSections); 202 203 if (BC.isMachO()) 204 return; 205 206 RuntimeFiniAddress = Linker.lookupSymbol("__bolt_instr_fini").value_or(0); 207 if (!RuntimeFiniAddress) { 208 errs() << "BOLT-ERROR: instrumentation library does not define " 209 "__bolt_instr_fini: " 210 << LibPath << "\n"; 211 exit(1); 212 } 213 RuntimeStartAddress = Linker.lookupSymbol("__bolt_instr_start").value_or(0); 214 if (!RuntimeStartAddress) { 215 errs() << "BOLT-ERROR: instrumentation library does not define " 216 "__bolt_instr_start: " 217 << LibPath << "\n"; 218 exit(1); 219 } 220 outs() << "BOLT-INFO: output linked against instrumentation runtime " 221 "library, lib entry point is 0x" 222 << Twine::utohexstr(RuntimeFiniAddress) << "\n"; 223 outs() << "BOLT-INFO: clear procedure is 0x" 224 << Twine::utohexstr( 225 Linker.lookupSymbol("__bolt_instr_clear_counters").value_or(0)) 226 << "\n"; 227 228 emitTablesAsELFNote(BC); 229 } 230 231 std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) { 232 std::string TablesStr; 233 raw_string_ostream OS(TablesStr); 234 235 // This is sync'ed with runtime/instr.cpp:readDescriptions() 236 auto getOutputAddress = [](const BinaryFunction &Func, 237 uint64_t Offset) -> uint64_t { 238 return Offset == 0 239 ? Func.getOutputAddress() 240 : Func.translateInputToOutputAddress(Func.getAddress() + Offset); 241 }; 242 243 // Indirect targets need to be sorted for fast lookup during runtime 244 llvm::sort(Summary->IndCallTargetDescriptions, 245 [&](const IndCallTargetDescription &A, 246 const IndCallTargetDescription &B) { 247 return getOutputAddress(*A.Target, A.ToLoc.Offset) < 248 getOutputAddress(*B.Target, B.ToLoc.Offset); 249 }); 250 251 // Start of the vector with descriptions (one CounterDescription for each 252 // counter), vector size is Counters.size() CounterDescription-sized elmts 253 const size_t IDSize = 254 Summary->IndCallDescriptions.size() * sizeof(IndCallDescription); 255 OS.write(reinterpret_cast<const char *>(&IDSize), 4); 256 for (const IndCallDescription &Desc : Summary->IndCallDescriptions) { 257 OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), 4); 258 OS.write(reinterpret_cast<const char *>(&Desc.FromLoc.Offset), 4); 259 } 260 261 const size_t ITDSize = Summary->IndCallTargetDescriptions.size() * 262 sizeof(IndCallTargetDescription); 263 OS.write(reinterpret_cast<const char *>(&ITDSize), 4); 264 for (const IndCallTargetDescription &Desc : 265 Summary->IndCallTargetDescriptions) { 266 OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), 4); 267 OS.write(reinterpret_cast<const char *>(&Desc.ToLoc.Offset), 4); 268 uint64_t TargetFuncAddress = 269 getOutputAddress(*Desc.Target, Desc.ToLoc.Offset); 270 OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8); 271 } 272 273 uint32_t FuncDescSize = Summary->getFDSize(); 274 OS.write(reinterpret_cast<const char *>(&FuncDescSize), 4); 275 for (const FunctionDescription &Desc : Summary->FunctionDescriptions) { 276 const size_t LeafNum = Desc.LeafNodes.size(); 277 OS.write(reinterpret_cast<const char *>(&LeafNum), 4); 278 for (const InstrumentedNode &LeafNode : Desc.LeafNodes) { 279 OS.write(reinterpret_cast<const char *>(&LeafNode.Node), 4); 280 OS.write(reinterpret_cast<const char *>(&LeafNode.Counter), 4); 281 } 282 const size_t EdgesNum = Desc.Edges.size(); 283 OS.write(reinterpret_cast<const char *>(&EdgesNum), 4); 284 for (const EdgeDescription &Edge : Desc.Edges) { 285 OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), 4); 286 OS.write(reinterpret_cast<const char *>(&Edge.FromLoc.Offset), 4); 287 OS.write(reinterpret_cast<const char *>(&Edge.FromNode), 4); 288 OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), 4); 289 OS.write(reinterpret_cast<const char *>(&Edge.ToLoc.Offset), 4); 290 OS.write(reinterpret_cast<const char *>(&Edge.ToNode), 4); 291 OS.write(reinterpret_cast<const char *>(&Edge.Counter), 4); 292 } 293 const size_t CallsNum = Desc.Calls.size(); 294 OS.write(reinterpret_cast<const char *>(&CallsNum), 4); 295 for (const CallDescription &Call : Desc.Calls) { 296 OS.write(reinterpret_cast<const char *>(&Call.FromLoc.FuncString), 4); 297 OS.write(reinterpret_cast<const char *>(&Call.FromLoc.Offset), 4); 298 OS.write(reinterpret_cast<const char *>(&Call.FromNode), 4); 299 OS.write(reinterpret_cast<const char *>(&Call.ToLoc.FuncString), 4); 300 OS.write(reinterpret_cast<const char *>(&Call.ToLoc.Offset), 4); 301 OS.write(reinterpret_cast<const char *>(&Call.Counter), 4); 302 uint64_t TargetFuncAddress = 303 getOutputAddress(*Call.Target, Call.ToLoc.Offset); 304 OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8); 305 } 306 const size_t EntryNum = Desc.EntryNodes.size(); 307 OS.write(reinterpret_cast<const char *>(&EntryNum), 4); 308 for (const EntryNode &EntryNode : Desc.EntryNodes) { 309 OS.write(reinterpret_cast<const char *>(&EntryNode.Node), 8); 310 uint64_t TargetFuncAddress = 311 getOutputAddress(*Desc.Function, EntryNode.Address); 312 OS.write(reinterpret_cast<const char *>(&TargetFuncAddress), 8); 313 } 314 } 315 // Our string table lives immediately after descriptions vector 316 OS << Summary->StringTable; 317 318 return TablesStr; 319 } 320 321 void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) { 322 std::string TablesStr = buildTables(BC); 323 const std::string BoltInfo = BinarySection::encodeELFNote( 324 "BOLT", TablesStr, BinarySection::NT_BOLT_INSTRUMENTATION_TABLES); 325 BC.registerOrUpdateNoteSection(".bolt.instr.tables", copyByteArray(BoltInfo), 326 BoltInfo.size(), 327 /*Alignment=*/1, 328 /*IsReadOnly=*/true, ELF::SHT_NOTE); 329 } 330