1 //===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Register objects for access by profilers via the perf JIT interface. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" 14 15 #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h" 16 17 #include "llvm/Support/FileSystem.h" 18 #include "llvm/Support/MemoryBuffer.h" 19 #include "llvm/Support/Path.h" 20 #include "llvm/Support/Process.h" 21 #include "llvm/Support/Threading.h" 22 23 #include <mutex> 24 #include <optional> 25 26 #ifdef __linux__ 27 28 #include <sys/mman.h> // mmap() 29 #include <time.h> // clock_gettime(), time(), localtime_r() */ 30 31 #define DEBUG_TYPE "orc" 32 33 // language identifier (XXX: should we generate something better from debug 34 // info?) 35 #define JIT_LANG "llvm-IR" 36 #define LLVM_PERF_JIT_MAGIC \ 37 ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ 38 (uint32_t)'D') 39 #define LLVM_PERF_JIT_VERSION 1 40 41 using namespace llvm; 42 using namespace llvm::orc; 43 44 struct PerfState { 45 // cache lookups 46 uint32_t Pid; 47 48 // base directory for output data 49 std::string JitPath; 50 51 // output data stream, closed via Dumpstream 52 int DumpFd = -1; 53 54 // output data stream 55 std::unique_ptr<raw_fd_ostream> Dumpstream; 56 57 // perf mmap marker 58 void *MarkerAddr = NULL; 59 }; 60 61 // prevent concurrent dumps from messing up the output file 62 static std::mutex Mutex; 63 static std::optional<PerfState> State; 64 65 struct RecHeader { 66 uint32_t Id; 67 uint32_t TotalSize; 68 uint64_t Timestamp; 69 }; 70 71 struct DIR { 72 RecHeader Prefix; 73 uint64_t CodeAddr; 74 uint64_t NrEntry; 75 }; 76 77 struct DIE { 78 uint64_t CodeAddr; 79 uint32_t Line; 80 uint32_t Discrim; 81 }; 82 83 struct CLR { 84 RecHeader Prefix; 85 uint32_t Pid; 86 uint32_t Tid; 87 uint64_t Vma; 88 uint64_t CodeAddr; 89 uint64_t CodeSize; 90 uint64_t CodeIndex; 91 }; 92 93 struct UWR { 94 RecHeader Prefix; 95 uint64_t UnwindDataSize; 96 uint64_t EhFrameHeaderSize; 97 uint64_t MappedSize; 98 }; 99 100 static inline uint64_t timespec_to_ns(const struct timespec *TS) { 101 const uint64_t NanoSecPerSec = 1000000000; 102 return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec; 103 } 104 105 static inline uint64_t perf_get_timestamp() { 106 timespec TS; 107 if (clock_gettime(CLOCK_MONOTONIC, &TS)) 108 return 0; 109 110 return timespec_to_ns(&TS); 111 } 112 113 static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) { 114 assert(State && "PerfState not initialized"); 115 LLVM_DEBUG(dbgs() << "Writing debug record with " 116 << DebugRecord.Entries.size() << " entries\n"); 117 [[maybe_unused]] size_t Written = 0; 118 DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id), 119 DebugRecord.Prefix.TotalSize, perf_get_timestamp()}, 120 DebugRecord.CodeAddr, DebugRecord.Entries.size()}; 121 State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir)); 122 Written += sizeof(Dir); 123 for (auto &Die : DebugRecord.Entries) { 124 DIE d{Die.Addr, Die.Lineno, Die.Discrim}; 125 State->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d)); 126 State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1); 127 Written += sizeof(d) + Die.Name.size() + 1; 128 } 129 LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n"); 130 } 131 132 static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) { 133 assert(State && "PerfState not initialized"); 134 uint32_t Tid = get_threadid(); 135 LLVM_DEBUG(dbgs() << "Writing code record with code size " 136 << CodeRecord.CodeSize << " and code index " 137 << CodeRecord.CodeIndex << "\n"); 138 CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id), 139 CodeRecord.Prefix.TotalSize, perf_get_timestamp()}, 140 State->Pid, 141 Tid, 142 CodeRecord.Vma, 143 CodeRecord.CodeAddr, 144 CodeRecord.CodeSize, 145 CodeRecord.CodeIndex}; 146 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, " 147 << CodeRecord.Name.size() + 1 << " bytes of name, " 148 << CodeRecord.CodeSize << " bytes of code\n"); 149 State->Dumpstream->write(reinterpret_cast<const char *>(&Clr), sizeof(Clr)); 150 State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1); 151 State->Dumpstream->write((const char *)CodeRecord.CodeAddr, 152 CodeRecord.CodeSize); 153 } 154 155 static void 156 writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) { 157 assert(State && "PerfState not initialized"); 158 dbgs() << "Writing unwind record with unwind data size " 159 << UnwindRecord.UnwindDataSize << " and EH frame header size " 160 << UnwindRecord.EHFrameHdrSize << " and mapped size " 161 << UnwindRecord.MappedSize << "\n"; 162 UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id), 163 UnwindRecord.Prefix.TotalSize, perf_get_timestamp()}, 164 UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize, 165 UnwindRecord.MappedSize}; 166 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, " 167 << UnwindRecord.EHFrameHdrSize 168 << " bytes of EH frame header, " 169 << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize 170 << " bytes of EH frame\n"); 171 State->Dumpstream->write(reinterpret_cast<const char *>(&Uwr), sizeof(Uwr)); 172 if (UnwindRecord.EHFrameHdrAddr) 173 State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr, 174 UnwindRecord.EHFrameHdrSize); 175 else 176 State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(), 177 UnwindRecord.EHFrameHdrSize); 178 State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr, 179 UnwindRecord.UnwindDataSize - 180 UnwindRecord.EHFrameHdrSize); 181 } 182 183 static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) { 184 if (!State) 185 return make_error<StringError>("PerfState not initialized", 186 inconvertibleErrorCode()); 187 188 // Serialize the batch 189 std::lock_guard<std::mutex> Lock(Mutex); 190 if (Batch.UnwindingRecord.Prefix.TotalSize > 0) 191 writeUnwindRecord(Batch.UnwindingRecord); 192 193 for (const auto &DebugInfo : Batch.DebugInfoRecords) 194 writeDebugRecord(DebugInfo); 195 196 for (const auto &CodeLoad : Batch.CodeLoadRecords) 197 writeCodeRecord(CodeLoad); 198 199 State->Dumpstream->flush(); 200 201 return Error::success(); 202 } 203 204 struct Header { 205 uint32_t Magic; // characters "JiTD" 206 uint32_t Version; // header version 207 uint32_t TotalSize; // total size of header 208 uint32_t ElfMach; // elf mach target 209 uint32_t Pad1; // reserved 210 uint32_t Pid; 211 uint64_t Timestamp; // timestamp 212 uint64_t Flags; // flags 213 }; 214 215 static Error OpenMarker(PerfState &State) { 216 // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap 217 // is captured either live (perf record running when we mmap) or in deferred 218 // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump 219 // file for more meta data info about the jitted code. Perf report/annotate 220 // detect this special filename and process the jitdump file. 221 // 222 // Mapping must be PROT_EXEC to ensure it is captured by perf record 223 // even when not using -d option. 224 State.MarkerAddr = 225 ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC, 226 MAP_PRIVATE, State.DumpFd, 0); 227 228 if (State.MarkerAddr == MAP_FAILED) 229 return make_error<llvm::StringError>("could not mmap JIT marker", 230 inconvertibleErrorCode()); 231 232 return Error::success(); 233 } 234 235 void CloseMarker(PerfState &State) { 236 if (!State.MarkerAddr) 237 return; 238 239 munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate()); 240 State.MarkerAddr = nullptr; 241 } 242 243 static Expected<Header> FillMachine(PerfState &State) { 244 Header Hdr; 245 Hdr.Magic = LLVM_PERF_JIT_MAGIC; 246 Hdr.Version = LLVM_PERF_JIT_VERSION; 247 Hdr.TotalSize = sizeof(Hdr); 248 Hdr.Pid = State.Pid; 249 Hdr.Timestamp = perf_get_timestamp(); 250 251 char Id[16]; 252 struct { 253 uint16_t e_type; 254 uint16_t e_machine; 255 } Info; 256 257 size_t RequiredMemory = sizeof(Id) + sizeof(Info); 258 259 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 260 MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0); 261 262 // This'll not guarantee that enough data was actually read from the 263 // underlying file. Instead the trailing part of the buffer would be 264 // zeroed. Given the ELF signature check below that seems ok though, 265 // it's unlikely that the file ends just after that, and the 266 // consequence would just be that perf wouldn't recognize the 267 // signature. 268 if (!MB) 269 return make_error<llvm::StringError>("could not open /proc/self/exe", 270 MB.getError()); 271 272 memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id)); 273 memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info)); 274 275 // check ELF signature 276 if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F') 277 return make_error<llvm::StringError>("invalid ELF signature", 278 inconvertibleErrorCode()); 279 280 Hdr.ElfMach = Info.e_machine; 281 282 return Hdr; 283 } 284 285 static Error InitDebuggingDir(PerfState &State) { 286 time_t Time; 287 struct tm LocalTime; 288 char TimeBuffer[sizeof("YYYYMMDD")]; 289 SmallString<64> Path; 290 291 // search for location to dump data to 292 if (const char *BaseDir = getenv("JITDUMPDIR")) 293 Path.append(BaseDir); 294 else if (!sys::path::home_directory(Path)) 295 Path = "."; 296 297 // create debug directory 298 Path += "/.debug/jit/"; 299 if (auto EC = sys::fs::create_directories(Path)) { 300 std::string ErrStr; 301 raw_string_ostream ErrStream(ErrStr); 302 ErrStream << "could not create jit cache directory " << Path << ": " 303 << EC.message() << "\n"; 304 return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode()); 305 } 306 307 // create unique directory for dump data related to this process 308 time(&Time); 309 localtime_r(&Time, &LocalTime); 310 strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); 311 Path += JIT_LANG "-jit-"; 312 Path += TimeBuffer; 313 314 SmallString<128> UniqueDebugDir; 315 316 using sys::fs::createUniqueDirectory; 317 if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { 318 std::string ErrStr; 319 raw_string_ostream ErrStream(ErrStr); 320 ErrStream << "could not create unique jit cache directory " 321 << UniqueDebugDir << ": " << EC.message() << "\n"; 322 return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode()); 323 } 324 325 State.JitPath = std::string(UniqueDebugDir); 326 327 return Error::success(); 328 } 329 330 static Error registerJITLoaderPerfStartImpl() { 331 PerfState Tentative; 332 Tentative.Pid = sys::Process::getProcessId(); 333 // check if clock-source is supported 334 if (!perf_get_timestamp()) 335 return make_error<StringError>("kernel does not support CLOCK_MONOTONIC", 336 inconvertibleErrorCode()); 337 338 if (auto Err = InitDebuggingDir(Tentative)) 339 return Err; 340 341 std::string Filename; 342 raw_string_ostream FilenameBuf(Filename); 343 FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump"; 344 345 // Need to open ourselves, because we need to hand the FD to OpenMarker() and 346 // raw_fd_ostream doesn't expose the FD. 347 using sys::fs::openFileForWrite; 348 if (auto EC = openFileForReadWrite(Filename, Tentative.DumpFd, 349 sys::fs::CD_CreateNew, sys::fs::OF_None)) { 350 std::string ErrStr; 351 raw_string_ostream ErrStream(ErrStr); 352 ErrStream << "could not open JIT dump file " << Filename << ": " 353 << EC.message() << "\n"; 354 return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode()); 355 } 356 357 Tentative.Dumpstream = 358 std::make_unique<raw_fd_ostream>(Tentative.DumpFd, true); 359 360 auto Header = FillMachine(Tentative); 361 if (!Header) 362 return Header.takeError(); 363 364 // signal this process emits JIT information 365 if (auto Err = OpenMarker(Tentative)) 366 return Err; 367 368 Tentative.Dumpstream->write(reinterpret_cast<const char *>(&Header.get()), 369 sizeof(*Header)); 370 371 // Everything initialized, can do profiling now. 372 if (Tentative.Dumpstream->has_error()) 373 return make_error<StringError>("could not write JIT dump header", 374 inconvertibleErrorCode()); 375 376 State = std::move(Tentative); 377 return Error::success(); 378 } 379 380 static Error registerJITLoaderPerfEndImpl() { 381 if (!State) 382 return make_error<StringError>("PerfState not initialized", 383 inconvertibleErrorCode()); 384 385 RecHeader Close; 386 Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE); 387 Close.TotalSize = sizeof(Close); 388 Close.Timestamp = perf_get_timestamp(); 389 State->Dumpstream->write(reinterpret_cast<const char *>(&Close), 390 sizeof(Close)); 391 if (State->MarkerAddr) 392 CloseMarker(*State); 393 394 State.reset(); 395 return Error::success(); 396 } 397 398 extern "C" llvm::orc::shared::CWrapperFunctionResult 399 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { 400 using namespace orc::shared; 401 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle( 402 Data, Size, registerJITLoaderPerfImpl) 403 .release(); 404 } 405 406 extern "C" llvm::orc::shared::CWrapperFunctionResult 407 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { 408 using namespace orc::shared; 409 return WrapperFunction<SPSError()>::handle(Data, Size, 410 registerJITLoaderPerfStartImpl) 411 .release(); 412 } 413 414 extern "C" llvm::orc::shared::CWrapperFunctionResult 415 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { 416 using namespace orc::shared; 417 return WrapperFunction<SPSError()>::handle(Data, Size, 418 registerJITLoaderPerfEndImpl) 419 .release(); 420 } 421 422 #else 423 424 using namespace llvm; 425 using namespace llvm::orc; 426 427 static Error badOS() { 428 using namespace llvm; 429 return llvm::make_error<StringError>( 430 "unsupported OS (perf support is only available on linux!)", 431 inconvertibleErrorCode()); 432 } 433 434 static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); } 435 436 extern "C" llvm::orc::shared::CWrapperFunctionResult 437 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { 438 using namespace shared; 439 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size, 440 badOSBatch) 441 .release(); 442 } 443 444 extern "C" llvm::orc::shared::CWrapperFunctionResult 445 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { 446 using namespace shared; 447 return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); 448 } 449 450 extern "C" llvm::orc::shared::CWrapperFunctionResult 451 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { 452 using namespace shared; 453 return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); 454 } 455 456 #endif 457