xref: /llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp (revision 76e1521b0acff739c0425d0fcbb9360fc17f1af8)
1 //===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Register objects for access by profilers via the perf JIT interface.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
14 
15 #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
16 
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/Process.h"
21 #include "llvm/Support/Threading.h"
22 
23 #include <mutex>
24 #include <optional>
25 
26 #ifdef __linux__
27 
28 #include <sys/mman.h> // mmap()
29 #include <time.h>     // clock_gettime(), time(), localtime_r() */
30 #include <unistd.h>   // for read(), close()
31 
32 #define DEBUG_TYPE "orc"
33 
34 // language identifier (XXX: should we generate something better from debug
35 // info?)
36 #define JIT_LANG "llvm-IR"
37 #define LLVM_PERF_JIT_MAGIC                                                    \
38   ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 |            \
39    (uint32_t)'D')
40 #define LLVM_PERF_JIT_VERSION 1
41 
42 using namespace llvm;
43 using namespace llvm::orc;
44 
45 struct PerfState {
46   // cache lookups
47   uint32_t Pid;
48 
49   // base directory for output data
50   std::string JitPath;
51 
52   // output data stream, closed via Dumpstream
53   int DumpFd = -1;
54 
55   // output data stream
56   std::unique_ptr<raw_fd_ostream> Dumpstream;
57 
58   // perf mmap marker
59   void *MarkerAddr = NULL;
60 };
61 
62 // prevent concurrent dumps from messing up the output file
63 static std::mutex Mutex;
64 static std::optional<PerfState> state;
65 
66 struct RecHeader {
67   uint32_t Id;
68   uint32_t TotalSize;
69   uint64_t Timestamp;
70 };
71 
72 struct DIR {
73   RecHeader Prefix;
74   uint64_t CodeAddr;
75   uint64_t NrEntry;
76 };
77 
78 struct DIE {
79   uint64_t CodeAddr;
80   uint32_t Line;
81   uint32_t Discrim;
82 };
83 
84 struct CLR {
85   RecHeader Prefix;
86   uint32_t Pid;
87   uint32_t Tid;
88   uint64_t Vma;
89   uint64_t CodeAddr;
90   uint64_t CodeSize;
91   uint64_t CodeIndex;
92 };
93 
94 struct UWR {
95   RecHeader Prefix;
96   uint64_t UnwindDataSize;
97   uint64_t EhFrameHeaderSize;
98   uint64_t MappedSize;
99 };
100 
101 static inline uint64_t timespec_to_ns(const struct timespec *ts) {
102   const uint64_t NanoSecPerSec = 1000000000;
103   return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
104 }
105 
106 static inline uint64_t perf_get_timestamp() {
107   struct timespec ts;
108   int ret;
109 
110   ret = clock_gettime(CLOCK_MONOTONIC, &ts);
111   if (ret)
112     return 0;
113 
114   return timespec_to_ns(&ts);
115 }
116 
117 static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
118   assert(state && "PerfState not initialized");
119   LLVM_DEBUG(dbgs() << "Writing debug record with "
120                     << DebugRecord.Entries.size() << " entries\n");
121   size_t Written = 0;
122   DIR dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
123                     DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
124           DebugRecord.CodeAddr, DebugRecord.Entries.size()};
125   state->Dumpstream->write(reinterpret_cast<const char *>(&dir), sizeof(dir));
126   Written += sizeof(dir);
127   for (auto &die : DebugRecord.Entries) {
128     DIE d{die.Addr, die.Lineno, die.Discrim};
129     state->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));
130     state->Dumpstream->write(die.Name.data(), die.Name.size() + 1);
131     Written += sizeof(d) + die.Name.size() + 1;
132   }
133   LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
134 }
135 
136 static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
137   assert(state && "PerfState not initialized");
138   uint32_t Tid = get_threadid();
139   LLVM_DEBUG(dbgs() << "Writing code record with code size "
140                     << CodeRecord.CodeSize << " and code index "
141                     << CodeRecord.CodeIndex << "\n");
142   CLR clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
143                     CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
144           state->Pid,
145           Tid,
146           CodeRecord.Vma,
147           CodeRecord.CodeAddr,
148           CodeRecord.CodeSize,
149           CodeRecord.CodeIndex};
150   LLVM_DEBUG(dbgs() << "wrote " << sizeof(clr) << " bytes of CLR, "
151                     << CodeRecord.Name.size() + 1 << " bytes of name, "
152                     << CodeRecord.CodeSize << " bytes of code\n");
153   state->Dumpstream->write(reinterpret_cast<const char *>(&clr), sizeof(clr));
154   state->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);
155   state->Dumpstream->write((const char *)CodeRecord.CodeAddr,
156                            CodeRecord.CodeSize);
157 }
158 
159 static void
160 writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
161   assert(state && "PerfState not initialized");
162   dbgs() << "Writing unwind record with unwind data size "
163          << UnwindRecord.UnwindDataSize << " and EH frame header size "
164          << UnwindRecord.EHFrameHdrSize << " and mapped size "
165          << UnwindRecord.MappedSize << "\n";
166   UWR uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
167                     UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
168           UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
169           UnwindRecord.MappedSize};
170   LLVM_DEBUG(dbgs() << "wrote " << sizeof(uwr) << " bytes of UWR, "
171                     << UnwindRecord.EHFrameHdrSize
172                     << " bytes of EH frame header, "
173                     << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
174                     << " bytes of EH frame\n");
175   state->Dumpstream->write(reinterpret_cast<const char *>(&uwr), sizeof(uwr));
176   if (UnwindRecord.EHFrameHdrAddr) {
177     state->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,
178                              UnwindRecord.EHFrameHdrSize);
179   } else {
180     state->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),
181                              UnwindRecord.EHFrameHdrSize);
182   }
183   state->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,
184                            UnwindRecord.UnwindDataSize -
185                                UnwindRecord.EHFrameHdrSize);
186 }
187 
188 static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
189   if (!state) {
190     return make_error<StringError>("PerfState not initialized",
191                                    inconvertibleErrorCode());
192   }
193 
194   // Serialize the batch
195   std::lock_guard<std::mutex> Lock(Mutex);
196   if (Batch.UnwindingRecord.Prefix.TotalSize > 0) {
197     writeUnwindRecord(Batch.UnwindingRecord);
198   }
199   for (const auto &DebugInfo : Batch.DebugInfoRecords) {
200     writeDebugRecord(DebugInfo);
201   }
202   for (const auto &CodeLoad : Batch.CodeLoadRecords) {
203     writeCodeRecord(CodeLoad);
204   }
205 
206   state->Dumpstream->flush();
207 
208   return Error::success();
209 }
210 
211 struct Header {
212   uint32_t Magic;     // characters "JiTD"
213   uint32_t Version;   // header version
214   uint32_t TotalSize; // total size of header
215   uint32_t ElfMach;   // elf mach target
216   uint32_t Pad1;      // reserved
217   uint32_t Pid;
218   uint64_t Timestamp; // timestamp
219   uint64_t Flags;     // flags
220 };
221 
222 static Error OpenMarker(PerfState &state) {
223   // We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
224   // is captured either live (perf record running when we mmap) or in deferred
225   // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
226   // file for more meta data info about the jitted code. Perf report/annotate
227   // detect this special filename and process the jitdump file.
228   //
229   // Mapping must be PROT_EXEC to ensure it is captured by perf record
230   // even when not using -d option.
231   state.MarkerAddr =
232       ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
233              MAP_PRIVATE, state.DumpFd, 0);
234 
235   if (state.MarkerAddr == MAP_FAILED) {
236     return make_error<llvm::StringError>("could not mmap JIT marker",
237                                          inconvertibleErrorCode());
238   }
239   return Error::success();
240 }
241 
242 void CloseMarker(PerfState &state) {
243   if (!state.MarkerAddr)
244     return;
245 
246   munmap(state.MarkerAddr, sys::Process::getPageSizeEstimate());
247   state.MarkerAddr = nullptr;
248 }
249 
250 static Expected<Header> FillMachine(PerfState &state) {
251   Header hdr;
252   hdr.Magic = LLVM_PERF_JIT_MAGIC;
253   hdr.Version = LLVM_PERF_JIT_VERSION;
254   hdr.TotalSize = sizeof(hdr);
255   hdr.Pid = state.Pid;
256   hdr.Timestamp = perf_get_timestamp();
257 
258   char id[16];
259   struct {
260     uint16_t e_type;
261     uint16_t e_machine;
262   } info;
263 
264   size_t RequiredMemory = sizeof(id) + sizeof(info);
265 
266   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
267       MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);
268 
269   // This'll not guarantee that enough data was actually read from the
270   // underlying file. Instead the trailing part of the buffer would be
271   // zeroed. Given the ELF signature check below that seems ok though,
272   // it's unlikely that the file ends just after that, and the
273   // consequence would just be that perf wouldn't recognize the
274   // signature.
275   if (!MB) {
276     return make_error<llvm::StringError>("could not open /proc/self/exe",
277                                          MB.getError());
278   }
279 
280   memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
281   memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
282 
283   // check ELF signature
284   if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
285     return make_error<llvm::StringError>("invalid ELF signature",
286                                          inconvertibleErrorCode());
287   }
288 
289   hdr.ElfMach = info.e_machine;
290 
291   return hdr;
292 }
293 
294 static Error InitDebuggingDir(PerfState &state) {
295   time_t Time;
296   struct tm LocalTime;
297   char TimeBuffer[sizeof("YYYYMMDD")];
298   SmallString<64> Path;
299 
300   // search for location to dump data to
301   if (const char *BaseDir = getenv("JITDUMPDIR"))
302     Path.append(BaseDir);
303   else if (!sys::path::home_directory(Path))
304     Path = ".";
305 
306   // create debug directory
307   Path += "/.debug/jit/";
308   if (auto EC = sys::fs::create_directories(Path)) {
309     std::string errstr;
310     raw_string_ostream errstream(errstr);
311     errstream << "could not create jit cache directory " << Path << ": "
312               << EC.message() << "\n";
313     return make_error<StringError>(std::move(errstr), inconvertibleErrorCode());
314   }
315 
316   // create unique directory for dump data related to this process
317   time(&Time);
318   localtime_r(&Time, &LocalTime);
319   strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
320   Path += JIT_LANG "-jit-";
321   Path += TimeBuffer;
322 
323   SmallString<128> UniqueDebugDir;
324 
325   using sys::fs::createUniqueDirectory;
326   if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
327     std::string errstr;
328     raw_string_ostream errstream(errstr);
329     errstream << "could not create unique jit cache directory "
330               << UniqueDebugDir << ": " << EC.message() << "\n";
331     return make_error<StringError>(std::move(errstr), inconvertibleErrorCode());
332   }
333 
334   state.JitPath = std::string(UniqueDebugDir.str());
335 
336   return Error::success();
337 }
338 
339 static Error registerJITLoaderPerfStartImpl() {
340   PerfState tentative;
341   tentative.Pid = sys::Process::getProcessId();
342   // check if clock-source is supported
343   if (!perf_get_timestamp()) {
344     return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
345                                    inconvertibleErrorCode());
346   }
347 
348   if (auto err = InitDebuggingDir(tentative)) {
349     return std::move(err);
350   }
351 
352   std::string Filename;
353   raw_string_ostream FilenameBuf(Filename);
354   FilenameBuf << tentative.JitPath << "/jit-" << tentative.Pid << ".dump";
355 
356   // Need to open ourselves, because we need to hand the FD to OpenMarker() and
357   // raw_fd_ostream doesn't expose the FD.
358   using sys::fs::openFileForWrite;
359   if (auto EC = openFileForReadWrite(FilenameBuf.str(), tentative.DumpFd,
360                                      sys::fs::CD_CreateNew, sys::fs::OF_None)) {
361     std::string errstr;
362     raw_string_ostream errstream(errstr);
363     errstream << "could not open JIT dump file " << FilenameBuf.str() << ": "
364               << EC.message() << "\n";
365     return make_error<StringError>(std::move(errstr), inconvertibleErrorCode());
366   }
367 
368   tentative.Dumpstream =
369       std::make_unique<raw_fd_ostream>(tentative.DumpFd, true);
370 
371   auto header = FillMachine(tentative);
372   if (!header) {
373     return header.takeError();
374   }
375 
376   // signal this process emits JIT information
377   if (auto err = OpenMarker(tentative)) {
378     return std::move(err);
379   }
380 
381   tentative.Dumpstream->write(reinterpret_cast<const char *>(&header.get()),
382                               sizeof(*header));
383 
384   // Everything initialized, can do profiling now.
385   if (tentative.Dumpstream->has_error()) {
386     return make_error<StringError>("could not write JIT dump header",
387                                    inconvertibleErrorCode());
388   }
389   state = std::move(tentative);
390   return Error::success();
391 }
392 
393 static Error registerJITLoaderPerfEndImpl() {
394   if (!state) {
395     return make_error<StringError>("PerfState not initialized",
396                                    inconvertibleErrorCode());
397   }
398   RecHeader close;
399   close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
400   close.TotalSize = sizeof(close);
401   close.Timestamp = perf_get_timestamp();
402   state->Dumpstream->write(reinterpret_cast<const char *>(&close),
403                            sizeof(close));
404   if (state->MarkerAddr) {
405     CloseMarker(*state);
406   }
407   state.reset();
408   return Error::success();
409 }
410 
411 extern "C" llvm::orc::shared::CWrapperFunctionResult
412 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
413   using namespace orc::shared;
414   return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
415              Data, Size, registerJITLoaderPerfImpl)
416       .release();
417 }
418 
419 extern "C" llvm::orc::shared::CWrapperFunctionResult
420 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
421   using namespace orc::shared;
422   return WrapperFunction<SPSError()>::handle(Data, Size,
423                                              registerJITLoaderPerfStartImpl)
424       .release();
425 }
426 
427 extern "C" llvm::orc::shared::CWrapperFunctionResult
428 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
429   using namespace orc::shared;
430   return WrapperFunction<SPSError()>::handle(Data, Size,
431                                              registerJITLoaderPerfEndImpl)
432       .release();
433 }
434 
435 #else
436 
437 static Error badOS() {
438   return make_error<StringError>(
439       "unsupported OS (perf support is only available on linux!)",
440       inconvertibleErrorCode());
441 }
442 
443 static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
444 
445 extern "C" llvm::orc::shared::CWrapperFunctionResult
446 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
447   using namespace orc::shared;
448   return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,
449                                                                   badOSBatch)
450       .release();
451 }
452 
453 extern "C" llvm::orc::shared::CWrapperFunctionResult
454 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
455   using namespace orc::shared;
456   return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
457 }
458 
459 extern "C" llvm::orc::shared::CWrapperFunctionResult
460 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
461   using namespace orc::shared;
462   return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
463 }
464 
465 #endif