1 //===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Register objects for access by profilers via the perf JIT interface. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" 14 15 #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h" 16 17 #include "llvm/Support/FileSystem.h" 18 #include "llvm/Support/MemoryBuffer.h" 19 #include "llvm/Support/Path.h" 20 #include "llvm/Support/Process.h" 21 #include "llvm/Support/Threading.h" 22 23 #include <mutex> 24 #include <optional> 25 26 #ifdef __linux__ 27 28 #include <sys/mman.h> // mmap() 29 #include <time.h> // clock_gettime(), time(), localtime_r() */ 30 #include <unistd.h> // for read(), close() 31 32 #define DEBUG_TYPE "orc" 33 34 // language identifier (XXX: should we generate something better from debug 35 // info?) 36 #define JIT_LANG "llvm-IR" 37 #define LLVM_PERF_JIT_MAGIC \ 38 ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ 39 (uint32_t)'D') 40 #define LLVM_PERF_JIT_VERSION 1 41 42 using namespace llvm; 43 using namespace llvm::orc; 44 45 struct PerfState { 46 // cache lookups 47 uint32_t Pid; 48 49 // base directory for output data 50 std::string JitPath; 51 52 // output data stream, closed via Dumpstream 53 int DumpFd = -1; 54 55 // output data stream 56 std::unique_ptr<raw_fd_ostream> Dumpstream; 57 58 // perf mmap marker 59 void *MarkerAddr = NULL; 60 }; 61 62 // prevent concurrent dumps from messing up the output file 63 static std::mutex Mutex; 64 static std::optional<PerfState> state; 65 66 struct RecHeader { 67 uint32_t Id; 68 uint32_t TotalSize; 69 uint64_t Timestamp; 70 }; 71 72 struct DIR { 73 RecHeader Prefix; 74 uint64_t CodeAddr; 75 uint64_t NrEntry; 76 }; 77 78 struct DIE { 79 uint64_t CodeAddr; 80 uint32_t Line; 81 uint32_t Discrim; 82 }; 83 84 struct CLR { 85 RecHeader Prefix; 86 uint32_t Pid; 87 uint32_t Tid; 88 uint64_t Vma; 89 uint64_t CodeAddr; 90 uint64_t CodeSize; 91 uint64_t CodeIndex; 92 }; 93 94 struct UWR { 95 RecHeader Prefix; 96 uint64_t UnwindDataSize; 97 uint64_t EhFrameHeaderSize; 98 uint64_t MappedSize; 99 }; 100 101 static inline uint64_t timespec_to_ns(const struct timespec *ts) { 102 const uint64_t NanoSecPerSec = 1000000000; 103 return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; 104 } 105 106 static inline uint64_t perf_get_timestamp() { 107 struct timespec ts; 108 int ret; 109 110 ret = clock_gettime(CLOCK_MONOTONIC, &ts); 111 if (ret) 112 return 0; 113 114 return timespec_to_ns(&ts); 115 } 116 117 static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) { 118 assert(state && "PerfState not initialized"); 119 LLVM_DEBUG(dbgs() << "Writing debug record with " 120 << DebugRecord.Entries.size() << " entries\n"); 121 size_t Written = 0; 122 DIR dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id), 123 DebugRecord.Prefix.TotalSize, perf_get_timestamp()}, 124 DebugRecord.CodeAddr, DebugRecord.Entries.size()}; 125 state->Dumpstream->write(reinterpret_cast<const char *>(&dir), sizeof(dir)); 126 Written += sizeof(dir); 127 for (auto &die : DebugRecord.Entries) { 128 DIE d{die.Addr, die.Lineno, die.Discrim}; 129 state->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d)); 130 state->Dumpstream->write(die.Name.data(), die.Name.size() + 1); 131 Written += sizeof(d) + die.Name.size() + 1; 132 } 133 LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n"); 134 } 135 136 static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) { 137 assert(state && "PerfState not initialized"); 138 uint32_t Tid = get_threadid(); 139 LLVM_DEBUG(dbgs() << "Writing code record with code size " 140 << CodeRecord.CodeSize << " and code index " 141 << CodeRecord.CodeIndex << "\n"); 142 CLR clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id), 143 CodeRecord.Prefix.TotalSize, perf_get_timestamp()}, 144 state->Pid, 145 Tid, 146 CodeRecord.Vma, 147 CodeRecord.CodeAddr, 148 CodeRecord.CodeSize, 149 CodeRecord.CodeIndex}; 150 LLVM_DEBUG(dbgs() << "wrote " << sizeof(clr) << " bytes of CLR, " 151 << CodeRecord.Name.size() + 1 << " bytes of name, " 152 << CodeRecord.CodeSize << " bytes of code\n"); 153 state->Dumpstream->write(reinterpret_cast<const char *>(&clr), sizeof(clr)); 154 state->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1); 155 state->Dumpstream->write((const char *)CodeRecord.CodeAddr, 156 CodeRecord.CodeSize); 157 } 158 159 static void 160 writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) { 161 assert(state && "PerfState not initialized"); 162 dbgs() << "Writing unwind record with unwind data size " 163 << UnwindRecord.UnwindDataSize << " and EH frame header size " 164 << UnwindRecord.EHFrameHdrSize << " and mapped size " 165 << UnwindRecord.MappedSize << "\n"; 166 UWR uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id), 167 UnwindRecord.Prefix.TotalSize, perf_get_timestamp()}, 168 UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize, 169 UnwindRecord.MappedSize}; 170 LLVM_DEBUG(dbgs() << "wrote " << sizeof(uwr) << " bytes of UWR, " 171 << UnwindRecord.EHFrameHdrSize 172 << " bytes of EH frame header, " 173 << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize 174 << " bytes of EH frame\n"); 175 state->Dumpstream->write(reinterpret_cast<const char *>(&uwr), sizeof(uwr)); 176 if (UnwindRecord.EHFrameHdrAddr) { 177 state->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr, 178 UnwindRecord.EHFrameHdrSize); 179 } else { 180 state->Dumpstream->write(UnwindRecord.EHFrameHdr.data(), 181 UnwindRecord.EHFrameHdrSize); 182 } 183 state->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr, 184 UnwindRecord.UnwindDataSize - 185 UnwindRecord.EHFrameHdrSize); 186 } 187 188 static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) { 189 if (!state) { 190 return make_error<StringError>("PerfState not initialized", 191 inconvertibleErrorCode()); 192 } 193 194 // Serialize the batch 195 std::lock_guard<std::mutex> Lock(Mutex); 196 if (Batch.UnwindingRecord.Prefix.TotalSize > 0) { 197 writeUnwindRecord(Batch.UnwindingRecord); 198 } 199 for (const auto &DebugInfo : Batch.DebugInfoRecords) { 200 writeDebugRecord(DebugInfo); 201 } 202 for (const auto &CodeLoad : Batch.CodeLoadRecords) { 203 writeCodeRecord(CodeLoad); 204 } 205 206 state->Dumpstream->flush(); 207 208 return Error::success(); 209 } 210 211 struct Header { 212 uint32_t Magic; // characters "JiTD" 213 uint32_t Version; // header version 214 uint32_t TotalSize; // total size of header 215 uint32_t ElfMach; // elf mach target 216 uint32_t Pad1; // reserved 217 uint32_t Pid; 218 uint64_t Timestamp; // timestamp 219 uint64_t Flags; // flags 220 }; 221 222 static Error OpenMarker(PerfState &state) { 223 // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap 224 // is captured either live (perf record running when we mmap) or in deferred 225 // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump 226 // file for more meta data info about the jitted code. Perf report/annotate 227 // detect this special filename and process the jitdump file. 228 // 229 // Mapping must be PROT_EXEC to ensure it is captured by perf record 230 // even when not using -d option. 231 state.MarkerAddr = 232 ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC, 233 MAP_PRIVATE, state.DumpFd, 0); 234 235 if (state.MarkerAddr == MAP_FAILED) { 236 return make_error<llvm::StringError>("could not mmap JIT marker", 237 inconvertibleErrorCode()); 238 } 239 return Error::success(); 240 } 241 242 void CloseMarker(PerfState &state) { 243 if (!state.MarkerAddr) 244 return; 245 246 munmap(state.MarkerAddr, sys::Process::getPageSizeEstimate()); 247 state.MarkerAddr = nullptr; 248 } 249 250 static Expected<Header> FillMachine(PerfState &state) { 251 Header hdr; 252 hdr.Magic = LLVM_PERF_JIT_MAGIC; 253 hdr.Version = LLVM_PERF_JIT_VERSION; 254 hdr.TotalSize = sizeof(hdr); 255 hdr.Pid = state.Pid; 256 hdr.Timestamp = perf_get_timestamp(); 257 258 char id[16]; 259 struct { 260 uint16_t e_type; 261 uint16_t e_machine; 262 } info; 263 264 size_t RequiredMemory = sizeof(id) + sizeof(info); 265 266 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 267 MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0); 268 269 // This'll not guarantee that enough data was actually read from the 270 // underlying file. Instead the trailing part of the buffer would be 271 // zeroed. Given the ELF signature check below that seems ok though, 272 // it's unlikely that the file ends just after that, and the 273 // consequence would just be that perf wouldn't recognize the 274 // signature. 275 if (!MB) { 276 return make_error<llvm::StringError>("could not open /proc/self/exe", 277 MB.getError()); 278 } 279 280 memcpy(&id, (*MB)->getBufferStart(), sizeof(id)); 281 memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info)); 282 283 // check ELF signature 284 if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') { 285 return make_error<llvm::StringError>("invalid ELF signature", 286 inconvertibleErrorCode()); 287 } 288 289 hdr.ElfMach = info.e_machine; 290 291 return hdr; 292 } 293 294 static Error InitDebuggingDir(PerfState &state) { 295 time_t Time; 296 struct tm LocalTime; 297 char TimeBuffer[sizeof("YYYYMMDD")]; 298 SmallString<64> Path; 299 300 // search for location to dump data to 301 if (const char *BaseDir = getenv("JITDUMPDIR")) 302 Path.append(BaseDir); 303 else if (!sys::path::home_directory(Path)) 304 Path = "."; 305 306 // create debug directory 307 Path += "/.debug/jit/"; 308 if (auto EC = sys::fs::create_directories(Path)) { 309 std::string errstr; 310 raw_string_ostream errstream(errstr); 311 errstream << "could not create jit cache directory " << Path << ": " 312 << EC.message() << "\n"; 313 return make_error<StringError>(std::move(errstr), inconvertibleErrorCode()); 314 } 315 316 // create unique directory for dump data related to this process 317 time(&Time); 318 localtime_r(&Time, &LocalTime); 319 strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); 320 Path += JIT_LANG "-jit-"; 321 Path += TimeBuffer; 322 323 SmallString<128> UniqueDebugDir; 324 325 using sys::fs::createUniqueDirectory; 326 if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { 327 std::string errstr; 328 raw_string_ostream errstream(errstr); 329 errstream << "could not create unique jit cache directory " 330 << UniqueDebugDir << ": " << EC.message() << "\n"; 331 return make_error<StringError>(std::move(errstr), inconvertibleErrorCode()); 332 } 333 334 state.JitPath = std::string(UniqueDebugDir.str()); 335 336 return Error::success(); 337 } 338 339 static Error registerJITLoaderPerfStartImpl() { 340 PerfState tentative; 341 tentative.Pid = sys::Process::getProcessId(); 342 // check if clock-source is supported 343 if (!perf_get_timestamp()) { 344 return make_error<StringError>("kernel does not support CLOCK_MONOTONIC", 345 inconvertibleErrorCode()); 346 } 347 348 if (auto err = InitDebuggingDir(tentative)) { 349 return std::move(err); 350 } 351 352 std::string Filename; 353 raw_string_ostream FilenameBuf(Filename); 354 FilenameBuf << tentative.JitPath << "/jit-" << tentative.Pid << ".dump"; 355 356 // Need to open ourselves, because we need to hand the FD to OpenMarker() and 357 // raw_fd_ostream doesn't expose the FD. 358 using sys::fs::openFileForWrite; 359 if (auto EC = openFileForReadWrite(FilenameBuf.str(), tentative.DumpFd, 360 sys::fs::CD_CreateNew, sys::fs::OF_None)) { 361 std::string errstr; 362 raw_string_ostream errstream(errstr); 363 errstream << "could not open JIT dump file " << FilenameBuf.str() << ": " 364 << EC.message() << "\n"; 365 return make_error<StringError>(std::move(errstr), inconvertibleErrorCode()); 366 } 367 368 tentative.Dumpstream = 369 std::make_unique<raw_fd_ostream>(tentative.DumpFd, true); 370 371 auto header = FillMachine(tentative); 372 if (!header) { 373 return header.takeError(); 374 } 375 376 // signal this process emits JIT information 377 if (auto err = OpenMarker(tentative)) { 378 return std::move(err); 379 } 380 381 tentative.Dumpstream->write(reinterpret_cast<const char *>(&header.get()), 382 sizeof(*header)); 383 384 // Everything initialized, can do profiling now. 385 if (tentative.Dumpstream->has_error()) { 386 return make_error<StringError>("could not write JIT dump header", 387 inconvertibleErrorCode()); 388 } 389 state = std::move(tentative); 390 return Error::success(); 391 } 392 393 static Error registerJITLoaderPerfEndImpl() { 394 if (!state) { 395 return make_error<StringError>("PerfState not initialized", 396 inconvertibleErrorCode()); 397 } 398 RecHeader close; 399 close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE); 400 close.TotalSize = sizeof(close); 401 close.Timestamp = perf_get_timestamp(); 402 state->Dumpstream->write(reinterpret_cast<const char *>(&close), 403 sizeof(close)); 404 if (state->MarkerAddr) { 405 CloseMarker(*state); 406 } 407 state.reset(); 408 return Error::success(); 409 } 410 411 extern "C" llvm::orc::shared::CWrapperFunctionResult 412 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { 413 using namespace orc::shared; 414 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle( 415 Data, Size, registerJITLoaderPerfImpl) 416 .release(); 417 } 418 419 extern "C" llvm::orc::shared::CWrapperFunctionResult 420 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { 421 using namespace orc::shared; 422 return WrapperFunction<SPSError()>::handle(Data, Size, 423 registerJITLoaderPerfStartImpl) 424 .release(); 425 } 426 427 extern "C" llvm::orc::shared::CWrapperFunctionResult 428 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { 429 using namespace orc::shared; 430 return WrapperFunction<SPSError()>::handle(Data, Size, 431 registerJITLoaderPerfEndImpl) 432 .release(); 433 } 434 435 #else 436 437 static Error badOS() { 438 return make_error<StringError>( 439 "unsupported OS (perf support is only available on linux!)", 440 inconvertibleErrorCode()); 441 } 442 443 static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); } 444 445 extern "C" llvm::orc::shared::CWrapperFunctionResult 446 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { 447 using namespace orc::shared; 448 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size, 449 badOSBatch) 450 .release(); 451 } 452 453 extern "C" llvm::orc::shared::CWrapperFunctionResult 454 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { 455 using namespace orc::shared; 456 return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); 457 } 458 459 extern "C" llvm::orc::shared::CWrapperFunctionResult 460 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { 461 using namespace orc::shared; 462 return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); 463 } 464 465 #endif