xref: /llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp (revision 1f4d91ecb8529678a3d3919d7523743bd21942ca)
1 //===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Register objects for access by profilers via the perf JIT interface.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
14 
15 #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
16 
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/Process.h"
21 #include "llvm/Support/Threading.h"
22 
23 #include <mutex>
24 #include <optional>
25 
26 #ifdef __linux__
27 
28 #include <sys/mman.h> // mmap()
29 #include <time.h>     // clock_gettime(), time(), localtime_r() */
30 
31 #define DEBUG_TYPE "orc"
32 
33 // language identifier (XXX: should we generate something better from debug
34 // info?)
35 #define JIT_LANG "llvm-IR"
36 #define LLVM_PERF_JIT_MAGIC                                                    \
37   ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 |            \
38    (uint32_t)'D')
39 #define LLVM_PERF_JIT_VERSION 1
40 
41 using namespace llvm;
42 using namespace llvm::orc;
43 
44 struct PerfState {
45   // cache lookups
46   uint32_t Pid;
47 
48   // base directory for output data
49   std::string JitPath;
50 
51   // output data stream, closed via Dumpstream
52   int DumpFd = -1;
53 
54   // output data stream
55   std::unique_ptr<raw_fd_ostream> Dumpstream;
56 
57   // perf mmap marker
58   void *MarkerAddr = NULL;
59 };
60 
61 // prevent concurrent dumps from messing up the output file
62 static std::mutex Mutex;
63 static std::optional<PerfState> State;
64 
65 struct RecHeader {
66   uint32_t Id;
67   uint32_t TotalSize;
68   uint64_t Timestamp;
69 };
70 
71 struct DIR {
72   RecHeader Prefix;
73   uint64_t CodeAddr;
74   uint64_t NrEntry;
75 };
76 
77 struct DIE {
78   uint64_t CodeAddr;
79   uint32_t Line;
80   uint32_t Discrim;
81 };
82 
83 struct CLR {
84   RecHeader Prefix;
85   uint32_t Pid;
86   uint32_t Tid;
87   uint64_t Vma;
88   uint64_t CodeAddr;
89   uint64_t CodeSize;
90   uint64_t CodeIndex;
91 };
92 
93 struct UWR {
94   RecHeader Prefix;
95   uint64_t UnwindDataSize;
96   uint64_t EhFrameHeaderSize;
97   uint64_t MappedSize;
98 };
99 
100 static inline uint64_t timespec_to_ns(const struct timespec *TS) {
101   const uint64_t NanoSecPerSec = 1000000000;
102   return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;
103 }
104 
105 static inline uint64_t perf_get_timestamp() {
106   timespec TS;
107   if (clock_gettime(CLOCK_MONOTONIC, &TS))
108     return 0;
109 
110   return timespec_to_ns(&TS);
111 }
112 
113 static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
114   assert(State && "PerfState not initialized");
115   LLVM_DEBUG(dbgs() << "Writing debug record with "
116                     << DebugRecord.Entries.size() << " entries\n");
117   [[maybe_unused]] size_t Written = 0;
118   DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
119                     DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
120           DebugRecord.CodeAddr, DebugRecord.Entries.size()};
121   State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir));
122   Written += sizeof(Dir);
123   for (auto &Die : DebugRecord.Entries) {
124     DIE d{Die.Addr, Die.Lineno, Die.Discrim};
125     State->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));
126     State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1);
127     Written += sizeof(d) + Die.Name.size() + 1;
128   }
129   LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
130 }
131 
132 static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
133   assert(State && "PerfState not initialized");
134   uint32_t Tid = get_threadid();
135   LLVM_DEBUG(dbgs() << "Writing code record with code size "
136                     << CodeRecord.CodeSize << " and code index "
137                     << CodeRecord.CodeIndex << "\n");
138   CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
139                     CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
140           State->Pid,
141           Tid,
142           CodeRecord.Vma,
143           CodeRecord.CodeAddr,
144           CodeRecord.CodeSize,
145           CodeRecord.CodeIndex};
146   LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, "
147                     << CodeRecord.Name.size() + 1 << " bytes of name, "
148                     << CodeRecord.CodeSize << " bytes of code\n");
149   State->Dumpstream->write(reinterpret_cast<const char *>(&Clr), sizeof(Clr));
150   State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);
151   State->Dumpstream->write((const char *)CodeRecord.CodeAddr,
152                            CodeRecord.CodeSize);
153 }
154 
155 static void
156 writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
157   assert(State && "PerfState not initialized");
158   dbgs() << "Writing unwind record with unwind data size "
159          << UnwindRecord.UnwindDataSize << " and EH frame header size "
160          << UnwindRecord.EHFrameHdrSize << " and mapped size "
161          << UnwindRecord.MappedSize << "\n";
162   UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
163                     UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
164           UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
165           UnwindRecord.MappedSize};
166   LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "
167                     << UnwindRecord.EHFrameHdrSize
168                     << " bytes of EH frame header, "
169                     << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
170                     << " bytes of EH frame\n");
171   State->Dumpstream->write(reinterpret_cast<const char *>(&Uwr), sizeof(Uwr));
172   if (UnwindRecord.EHFrameHdrAddr)
173     State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,
174                              UnwindRecord.EHFrameHdrSize);
175   else
176     State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),
177                              UnwindRecord.EHFrameHdrSize);
178   State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,
179                            UnwindRecord.UnwindDataSize -
180                                UnwindRecord.EHFrameHdrSize);
181 }
182 
183 static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
184   if (!State)
185     return make_error<StringError>("PerfState not initialized",
186                                    inconvertibleErrorCode());
187 
188   // Serialize the batch
189   std::lock_guard<std::mutex> Lock(Mutex);
190   if (Batch.UnwindingRecord.Prefix.TotalSize > 0)
191     writeUnwindRecord(Batch.UnwindingRecord);
192 
193   for (const auto &DebugInfo : Batch.DebugInfoRecords)
194     writeDebugRecord(DebugInfo);
195 
196   for (const auto &CodeLoad : Batch.CodeLoadRecords)
197     writeCodeRecord(CodeLoad);
198 
199   State->Dumpstream->flush();
200 
201   return Error::success();
202 }
203 
204 struct Header {
205   uint32_t Magic;     // characters "JiTD"
206   uint32_t Version;   // header version
207   uint32_t TotalSize; // total size of header
208   uint32_t ElfMach;   // elf mach target
209   uint32_t Pad1;      // reserved
210   uint32_t Pid;
211   uint64_t Timestamp; // timestamp
212   uint64_t Flags;     // flags
213 };
214 
215 static Error OpenMarker(PerfState &State) {
216   // We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
217   // is captured either live (perf record running when we mmap) or in deferred
218   // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
219   // file for more meta data info about the jitted code. Perf report/annotate
220   // detect this special filename and process the jitdump file.
221   //
222   // Mapping must be PROT_EXEC to ensure it is captured by perf record
223   // even when not using -d option.
224   State.MarkerAddr =
225       ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
226              MAP_PRIVATE, State.DumpFd, 0);
227 
228   if (State.MarkerAddr == MAP_FAILED)
229     return make_error<llvm::StringError>("could not mmap JIT marker",
230                                          inconvertibleErrorCode());
231 
232   return Error::success();
233 }
234 
235 void CloseMarker(PerfState &State) {
236   if (!State.MarkerAddr)
237     return;
238 
239   munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate());
240   State.MarkerAddr = nullptr;
241 }
242 
243 static Expected<Header> FillMachine(PerfState &State) {
244   Header Hdr;
245   Hdr.Magic = LLVM_PERF_JIT_MAGIC;
246   Hdr.Version = LLVM_PERF_JIT_VERSION;
247   Hdr.TotalSize = sizeof(Hdr);
248   Hdr.Pid = State.Pid;
249   Hdr.Timestamp = perf_get_timestamp();
250 
251   char Id[16];
252   struct {
253     uint16_t e_type;
254     uint16_t e_machine;
255   } Info;
256 
257   size_t RequiredMemory = sizeof(Id) + sizeof(Info);
258 
259   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
260       MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);
261 
262   // This'll not guarantee that enough data was actually read from the
263   // underlying file. Instead the trailing part of the buffer would be
264   // zeroed. Given the ELF signature check below that seems ok though,
265   // it's unlikely that the file ends just after that, and the
266   // consequence would just be that perf wouldn't recognize the
267   // signature.
268   if (!MB)
269     return make_error<llvm::StringError>("could not open /proc/self/exe",
270                                          MB.getError());
271 
272   memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id));
273   memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info));
274 
275   // check ELF signature
276   if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F')
277     return make_error<llvm::StringError>("invalid ELF signature",
278                                          inconvertibleErrorCode());
279 
280   Hdr.ElfMach = Info.e_machine;
281 
282   return Hdr;
283 }
284 
285 static Error InitDebuggingDir(PerfState &State) {
286   time_t Time;
287   struct tm LocalTime;
288   char TimeBuffer[sizeof("YYYYMMDD")];
289   SmallString<64> Path;
290 
291   // search for location to dump data to
292   if (const char *BaseDir = getenv("JITDUMPDIR"))
293     Path.append(BaseDir);
294   else if (!sys::path::home_directory(Path))
295     Path = ".";
296 
297   // create debug directory
298   Path += "/.debug/jit/";
299   if (auto EC = sys::fs::create_directories(Path)) {
300     std::string ErrStr;
301     raw_string_ostream ErrStream(ErrStr);
302     ErrStream << "could not create jit cache directory " << Path << ": "
303               << EC.message() << "\n";
304     return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
305   }
306 
307   // create unique directory for dump data related to this process
308   time(&Time);
309   localtime_r(&Time, &LocalTime);
310   strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
311   Path += JIT_LANG "-jit-";
312   Path += TimeBuffer;
313 
314   SmallString<128> UniqueDebugDir;
315 
316   using sys::fs::createUniqueDirectory;
317   if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
318     std::string ErrStr;
319     raw_string_ostream ErrStream(ErrStr);
320     ErrStream << "could not create unique jit cache directory "
321               << UniqueDebugDir << ": " << EC.message() << "\n";
322     return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
323   }
324 
325   State.JitPath = std::string(UniqueDebugDir);
326 
327   return Error::success();
328 }
329 
330 static Error registerJITLoaderPerfStartImpl() {
331   PerfState Tentative;
332   Tentative.Pid = sys::Process::getProcessId();
333   // check if clock-source is supported
334   if (!perf_get_timestamp())
335     return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
336                                    inconvertibleErrorCode());
337 
338   if (auto Err = InitDebuggingDir(Tentative))
339     return Err;
340 
341   std::string Filename;
342   raw_string_ostream FilenameBuf(Filename);
343   FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump";
344 
345   // Need to open ourselves, because we need to hand the FD to OpenMarker() and
346   // raw_fd_ostream doesn't expose the FD.
347   using sys::fs::openFileForWrite;
348   if (auto EC = openFileForReadWrite(Filename, Tentative.DumpFd,
349                                      sys::fs::CD_CreateNew, sys::fs::OF_None)) {
350     std::string ErrStr;
351     raw_string_ostream ErrStream(ErrStr);
352     ErrStream << "could not open JIT dump file " << Filename << ": "
353               << EC.message() << "\n";
354     return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
355   }
356 
357   Tentative.Dumpstream =
358       std::make_unique<raw_fd_ostream>(Tentative.DumpFd, true);
359 
360   auto Header = FillMachine(Tentative);
361   if (!Header)
362     return Header.takeError();
363 
364   // signal this process emits JIT information
365   if (auto Err = OpenMarker(Tentative))
366     return Err;
367 
368   Tentative.Dumpstream->write(reinterpret_cast<const char *>(&Header.get()),
369                               sizeof(*Header));
370 
371   // Everything initialized, can do profiling now.
372   if (Tentative.Dumpstream->has_error())
373     return make_error<StringError>("could not write JIT dump header",
374                                    inconvertibleErrorCode());
375 
376   State = std::move(Tentative);
377   return Error::success();
378 }
379 
380 static Error registerJITLoaderPerfEndImpl() {
381   if (!State)
382     return make_error<StringError>("PerfState not initialized",
383                                    inconvertibleErrorCode());
384 
385   RecHeader Close;
386   Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
387   Close.TotalSize = sizeof(Close);
388   Close.Timestamp = perf_get_timestamp();
389   State->Dumpstream->write(reinterpret_cast<const char *>(&Close),
390                            sizeof(Close));
391   if (State->MarkerAddr)
392     CloseMarker(*State);
393 
394   State.reset();
395   return Error::success();
396 }
397 
398 extern "C" llvm::orc::shared::CWrapperFunctionResult
399 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
400   using namespace orc::shared;
401   return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
402              Data, Size, registerJITLoaderPerfImpl)
403       .release();
404 }
405 
406 extern "C" llvm::orc::shared::CWrapperFunctionResult
407 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
408   using namespace orc::shared;
409   return WrapperFunction<SPSError()>::handle(Data, Size,
410                                              registerJITLoaderPerfStartImpl)
411       .release();
412 }
413 
414 extern "C" llvm::orc::shared::CWrapperFunctionResult
415 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
416   using namespace orc::shared;
417   return WrapperFunction<SPSError()>::handle(Data, Size,
418                                              registerJITLoaderPerfEndImpl)
419       .release();
420 }
421 
422 #else
423 
424 using namespace llvm;
425 using namespace llvm::orc;
426 
427 static Error badOS() {
428   using namespace llvm;
429   return llvm::make_error<StringError>(
430       "unsupported OS (perf support is only available on linux!)",
431       inconvertibleErrorCode());
432 }
433 
434 static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
435 
436 extern "C" llvm::orc::shared::CWrapperFunctionResult
437 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
438   using namespace shared;
439   return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,
440                                                                   badOSBatch)
441       .release();
442 }
443 
444 extern "C" llvm::orc::shared::CWrapperFunctionResult
445 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
446   using namespace shared;
447   return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
448 }
449 
450 extern "C" llvm::orc::shared::CWrapperFunctionResult
451 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
452   using namespace shared;
453   return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
454 }
455 
456 #endif
457