1 //===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a JITEventListener object that tells perf about JITted 10 // functions, including source line information. 11 // 12 // Documentation for perf jit integration is available at: 13 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt 14 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/Config/config.h" 20 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 21 #include "llvm/ExecutionEngine/JITEventListener.h" 22 #include "llvm/Object/ObjectFile.h" 23 #include "llvm/Object/SymbolSize.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Errno.h" 26 #include "llvm/Support/FileSystem.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/Mutex.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/Process.h" 31 #include "llvm/Support/Threading.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include <mutex> 34 35 #include <sys/mman.h> // mmap() 36 #include <time.h> // clock_gettime(), time(), localtime_r() */ 37 #include <unistd.h> // for read(), close() 38 39 using namespace llvm; 40 using namespace llvm::object; 41 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; 42 43 namespace { 44 45 // language identifier (XXX: should we generate something better from debug 46 // info?) 47 #define JIT_LANG "llvm-IR" 48 #define LLVM_PERF_JIT_MAGIC \ 49 ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ 50 (uint32_t)'D') 51 #define LLVM_PERF_JIT_VERSION 1 52 53 // bit 0: set if the jitdump file is using an architecture-specific timestamp 54 // clock source 55 #define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0) 56 57 struct LLVMPerfJitHeader; 58 59 class PerfJITEventListener : public JITEventListener { 60 public: 61 PerfJITEventListener(); 62 ~PerfJITEventListener() { 63 // Lock a mutex to correctly synchronize with prior calls to 64 // `notifyObjectLoaded` and `notifyFreeingObject` that happened on other 65 // threads to prevent tsan from complaining. 66 std::lock_guard<sys::Mutex> Guard(Mutex); 67 if (MarkerAddr) 68 CloseMarker(); 69 } 70 71 void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj, 72 const RuntimeDyld::LoadedObjectInfo &L) override; 73 void notifyFreeingObject(ObjectKey K) override; 74 75 private: 76 bool InitDebuggingDir(); 77 bool OpenMarker(); 78 void CloseMarker(); 79 static bool FillMachine(LLVMPerfJitHeader &hdr); 80 81 void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr, 82 uint64_t CodeSize); 83 void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines); 84 85 // cache lookups 86 sys::Process::Pid Pid; 87 88 // base directory for output data 89 std::string JitPath; 90 91 // output data stream, closed via Dumpstream 92 int DumpFd = -1; 93 94 // output data stream 95 std::unique_ptr<raw_fd_ostream> Dumpstream; 96 97 // prevent concurrent dumps from messing up the output file 98 sys::Mutex Mutex; 99 100 // perf mmap marker 101 void *MarkerAddr = NULL; 102 103 // perf support ready 104 bool SuccessfullyInitialized = false; 105 106 // identifier for functions, primarily to identify when moving them around 107 uint64_t CodeGeneration = 1; 108 }; 109 110 // The following are POD struct definitions from the perf jit specification 111 112 enum LLVMPerfJitRecordType { 113 JIT_CODE_LOAD = 0, 114 JIT_CODE_MOVE = 1, // not emitted, code isn't moved 115 JIT_CODE_DEBUG_INFO = 2, 116 JIT_CODE_CLOSE = 3, // not emitted, unnecessary 117 JIT_CODE_UNWINDING_INFO = 4, // not emitted 118 119 JIT_CODE_MAX 120 }; 121 122 struct LLVMPerfJitHeader { 123 uint32_t Magic; // characters "JiTD" 124 uint32_t Version; // header version 125 uint32_t TotalSize; // total size of header 126 uint32_t ElfMach; // elf mach target 127 uint32_t Pad1; // reserved 128 uint32_t Pid; 129 uint64_t Timestamp; // timestamp 130 uint64_t Flags; // flags 131 }; 132 133 // record prefix (mandatory in each record) 134 struct LLVMPerfJitRecordPrefix { 135 uint32_t Id; // record type identifier 136 uint32_t TotalSize; 137 uint64_t Timestamp; 138 }; 139 140 struct LLVMPerfJitRecordCodeLoad { 141 LLVMPerfJitRecordPrefix Prefix; 142 143 uint32_t Pid; 144 uint32_t Tid; 145 uint64_t Vma; 146 uint64_t CodeAddr; 147 uint64_t CodeSize; 148 uint64_t CodeIndex; 149 }; 150 151 struct LLVMPerfJitDebugEntry { 152 uint64_t Addr; 153 int Lineno; // source line number starting at 1 154 int Discrim; // column discriminator, 0 is default 155 // followed by null terminated filename, \xff\0 if same as previous entry 156 }; 157 158 struct LLVMPerfJitRecordDebugInfo { 159 LLVMPerfJitRecordPrefix Prefix; 160 161 uint64_t CodeAddr; 162 uint64_t NrEntry; 163 // followed by NrEntry LLVMPerfJitDebugEntry records 164 }; 165 166 static inline uint64_t timespec_to_ns(const struct timespec *ts) { 167 const uint64_t NanoSecPerSec = 1000000000; 168 return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; 169 } 170 171 static inline uint64_t perf_get_timestamp(void) { 172 struct timespec ts; 173 int ret; 174 175 ret = clock_gettime(CLOCK_MONOTONIC, &ts); 176 if (ret) 177 return 0; 178 179 return timespec_to_ns(&ts); 180 } 181 182 PerfJITEventListener::PerfJITEventListener() 183 : Pid(sys::Process::getProcessId()) { 184 // check if clock-source is supported 185 if (!perf_get_timestamp()) { 186 errs() << "kernel does not support CLOCK_MONOTONIC\n"; 187 return; 188 } 189 190 if (!InitDebuggingDir()) { 191 errs() << "could not initialize debugging directory\n"; 192 return; 193 } 194 195 std::string Filename; 196 raw_string_ostream FilenameBuf(Filename); 197 FilenameBuf << JitPath << "/jit-" << Pid << ".dump"; 198 199 // Need to open ourselves, because we need to hand the FD to OpenMarker() and 200 // raw_fd_ostream doesn't expose the FD. 201 using sys::fs::openFileForWrite; 202 if (auto EC = openFileForReadWrite(Filename, DumpFd, sys::fs::CD_CreateNew, 203 sys::fs::OF_None)) { 204 errs() << "could not open JIT dump file " << Filename << ": " 205 << EC.message() << "\n"; 206 return; 207 } 208 209 Dumpstream = std::make_unique<raw_fd_ostream>(DumpFd, true); 210 211 LLVMPerfJitHeader Header = {0, 0, 0, 0, 0, 0, 0, 0}; 212 if (!FillMachine(Header)) 213 return; 214 215 // signal this process emits JIT information 216 if (!OpenMarker()) 217 return; 218 219 // emit dumpstream header 220 Header.Magic = LLVM_PERF_JIT_MAGIC; 221 Header.Version = LLVM_PERF_JIT_VERSION; 222 Header.TotalSize = sizeof(Header); 223 Header.Pid = Pid; 224 Header.Timestamp = perf_get_timestamp(); 225 Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header)); 226 227 // Everything initialized, can do profiling now. 228 if (!Dumpstream->has_error()) 229 SuccessfullyInitialized = true; 230 } 231 232 void PerfJITEventListener::notifyObjectLoaded( 233 ObjectKey K, const ObjectFile &Obj, 234 const RuntimeDyld::LoadedObjectInfo &L) { 235 236 if (!SuccessfullyInitialized) 237 return; 238 239 OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj); 240 const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); 241 242 // Get the address of the object image for use as a unique identifier 243 std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj); 244 245 // Use symbol info to iterate over functions in the object. 246 for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) { 247 SymbolRef Sym = P.first; 248 std::string SourceFileName; 249 250 Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType(); 251 if (!SymTypeOrErr) { 252 // There's not much we can with errors here 253 consumeError(SymTypeOrErr.takeError()); 254 continue; 255 } 256 SymbolRef::Type SymType = *SymTypeOrErr; 257 if (SymType != SymbolRef::ST_Function) 258 continue; 259 260 Expected<StringRef> Name = Sym.getName(); 261 if (!Name) { 262 consumeError(Name.takeError()); 263 continue; 264 } 265 266 Expected<uint64_t> AddrOrErr = Sym.getAddress(); 267 if (!AddrOrErr) { 268 consumeError(AddrOrErr.takeError()); 269 continue; 270 } 271 uint64_t Size = P.second; 272 object::SectionedAddress Address; 273 Address.Address = *AddrOrErr; 274 275 uint64_t SectionIndex = object::SectionedAddress::UndefSection; 276 if (auto SectOrErr = Sym.getSection()) 277 if (*SectOrErr != Obj.section_end()) 278 SectionIndex = SectOrErr.get()->getIndex(); 279 280 // According to spec debugging info has to come before loading the 281 // corresponding code load. 282 DILineInfoTable Lines = Context->getLineInfoForAddressRange( 283 {*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath); 284 285 NotifyDebug(*AddrOrErr, Lines); 286 NotifyCode(Name, *AddrOrErr, Size); 287 } 288 289 // avoid races with writes 290 std::lock_guard<sys::Mutex> Guard(Mutex); 291 292 Dumpstream->flush(); 293 } 294 295 void PerfJITEventListener::notifyFreeingObject(ObjectKey K) { 296 // perf currently doesn't have an interface for unloading. But munmap()ing the 297 // code section does, so that's ok. 298 } 299 300 bool PerfJITEventListener::InitDebuggingDir() { 301 time_t Time; 302 struct tm LocalTime; 303 char TimeBuffer[sizeof("YYYYMMDD")]; 304 SmallString<64> Path; 305 306 // search for location to dump data to 307 if (const char *BaseDir = getenv("JITDUMPDIR")) 308 Path.append(BaseDir); 309 else if (!sys::path::home_directory(Path)) 310 Path = "."; 311 312 // create debug directory 313 Path += "/.debug/jit/"; 314 if (auto EC = sys::fs::create_directories(Path)) { 315 errs() << "could not create jit cache directory " << Path << ": " 316 << EC.message() << "\n"; 317 return false; 318 } 319 320 // create unique directory for dump data related to this process 321 time(&Time); 322 localtime_r(&Time, &LocalTime); 323 strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); 324 Path += JIT_LANG "-jit-"; 325 Path += TimeBuffer; 326 327 SmallString<128> UniqueDebugDir; 328 329 using sys::fs::createUniqueDirectory; 330 if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { 331 errs() << "could not create unique jit cache directory " << UniqueDebugDir 332 << ": " << EC.message() << "\n"; 333 return false; 334 } 335 336 JitPath = std::string(UniqueDebugDir.str()); 337 338 return true; 339 } 340 341 bool PerfJITEventListener::OpenMarker() { 342 // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap 343 // is captured either live (perf record running when we mmap) or in deferred 344 // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump 345 // file for more meta data info about the jitted code. Perf report/annotate 346 // detect this special filename and process the jitdump file. 347 // 348 // Mapping must be PROT_EXEC to ensure it is captured by perf record 349 // even when not using -d option. 350 MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(), 351 PROT_READ | PROT_EXEC, MAP_PRIVATE, DumpFd, 0); 352 353 if (MarkerAddr == MAP_FAILED) { 354 errs() << "could not mmap JIT marker\n"; 355 return false; 356 } 357 return true; 358 } 359 360 void PerfJITEventListener::CloseMarker() { 361 if (!MarkerAddr) 362 return; 363 364 munmap(MarkerAddr, sys::Process::getPageSizeEstimate()); 365 MarkerAddr = nullptr; 366 } 367 368 bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) { 369 char id[16]; 370 struct { 371 uint16_t e_type; 372 uint16_t e_machine; 373 } info; 374 375 size_t RequiredMemory = sizeof(id) + sizeof(info); 376 377 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 378 MemoryBuffer::getFileSlice("/proc/self/exe", 379 RequiredMemory, 380 0); 381 382 // This'll not guarantee that enough data was actually read from the 383 // underlying file. Instead the trailing part of the buffer would be 384 // zeroed. Given the ELF signature check below that seems ok though, 385 // it's unlikely that the file ends just after that, and the 386 // consequence would just be that perf wouldn't recognize the 387 // signature. 388 if (auto EC = MB.getError()) { 389 errs() << "could not open /proc/self/exe: " << EC.message() << "\n"; 390 return false; 391 } 392 393 memcpy(&id, (*MB)->getBufferStart(), sizeof(id)); 394 memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info)); 395 396 // check ELF signature 397 if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') { 398 errs() << "invalid elf signature\n"; 399 return false; 400 } 401 402 hdr.ElfMach = info.e_machine; 403 404 return true; 405 } 406 407 void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol, 408 uint64_t CodeAddr, uint64_t CodeSize) { 409 assert(SuccessfullyInitialized); 410 411 // 0 length functions can't have samples. 412 if (CodeSize == 0) 413 return; 414 415 LLVMPerfJitRecordCodeLoad rec; 416 rec.Prefix.Id = JIT_CODE_LOAD; 417 rec.Prefix.TotalSize = sizeof(rec) + // debug record itself 418 Symbol->size() + 1 + // symbol name 419 CodeSize; // and code 420 rec.Prefix.Timestamp = perf_get_timestamp(); 421 422 rec.CodeSize = CodeSize; 423 rec.Vma = CodeAddr; 424 rec.CodeAddr = CodeAddr; 425 rec.Pid = Pid; 426 rec.Tid = get_threadid(); 427 428 // avoid interspersing output 429 std::lock_guard<sys::Mutex> Guard(Mutex); 430 431 rec.CodeIndex = CodeGeneration++; // under lock! 432 433 Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec)); 434 Dumpstream->write(Symbol->data(), Symbol->size() + 1); 435 Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize); 436 } 437 438 void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, 439 DILineInfoTable Lines) { 440 assert(SuccessfullyInitialized); 441 442 // Didn't get useful debug info. 443 if (Lines.empty()) 444 return; 445 446 LLVMPerfJitRecordDebugInfo rec; 447 rec.Prefix.Id = JIT_CODE_DEBUG_INFO; 448 rec.Prefix.TotalSize = sizeof(rec); // will be increased further 449 rec.Prefix.Timestamp = perf_get_timestamp(); 450 rec.CodeAddr = CodeAddr; 451 rec.NrEntry = Lines.size(); 452 453 // compute total size of record (variable due to filenames) 454 DILineInfoTable::iterator Begin = Lines.begin(); 455 DILineInfoTable::iterator End = Lines.end(); 456 for (DILineInfoTable::iterator It = Begin; It != End; ++It) { 457 DILineInfo &line = It->second; 458 rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry); 459 rec.Prefix.TotalSize += line.FileName.size() + 1; 460 } 461 462 // The debug_entry describes the source line information. It is defined as 463 // follows in order: 464 // * uint64_t code_addr: address of function for which the debug information 465 // is generated 466 // * uint32_t line : source file line number (starting at 1) 467 // * uint32_t discrim : column discriminator, 0 is default 468 // * char name[n] : source file name in ASCII, including null termination 469 470 // avoid interspersing output 471 std::lock_guard<sys::Mutex> Guard(Mutex); 472 473 Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec)); 474 475 for (DILineInfoTable::iterator It = Begin; It != End; ++It) { 476 LLVMPerfJitDebugEntry LineInfo; 477 DILineInfo &Line = It->second; 478 479 LineInfo.Addr = It->first; 480 // The function re-created by perf is preceded by a elf 481 // header. Need to adjust for that, otherwise the results are 482 // wrong. 483 LineInfo.Addr += 0x40; 484 LineInfo.Lineno = Line.Line; 485 LineInfo.Discrim = Line.Discriminator; 486 487 Dumpstream->write(reinterpret_cast<const char *>(&LineInfo), 488 sizeof(LineInfo)); 489 Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1); 490 } 491 } 492 493 } // end anonymous namespace 494 495 namespace llvm { 496 JITEventListener *JITEventListener::createPerfJITEventListener() { 497 // There should be only a single event listener per process, otherwise perf 498 // gets confused. 499 static PerfJITEventListener PerfListener; 500 return &PerfListener; 501 } 502 503 } // namespace llvm 504 505 LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void) 506 { 507 return wrap(JITEventListener::createPerfJITEventListener()); 508 } 509