xref: /llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp (revision a75565a684feaed48cf92284c5901aaa08fd91ad)
1 //===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a JITEventListener object that tells perf about JITted
10 // functions, including source line information.
11 //
12 // Documentation for perf jit integration is available at:
13 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
14 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Config/config.h"
20 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
21 #include "llvm/ExecutionEngine/JITEventListener.h"
22 #include "llvm/Object/ObjectFile.h"
23 #include "llvm/Object/SymbolSize.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Errno.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Mutex.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Threading.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <mutex>
34 
35 #include <sys/mman.h>  // mmap()
36 #include <time.h>      // clock_gettime(), time(), localtime_r() */
37 #include <unistd.h>    // for read(), close()
38 
39 using namespace llvm;
40 using namespace llvm::object;
41 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
42 
43 namespace {
44 
45 // language identifier (XXX: should we generate something better from debug
46 // info?)
47 #define JIT_LANG "llvm-IR"
48 #define LLVM_PERF_JIT_MAGIC                                                    \
49   ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 |            \
50    (uint32_t)'D')
51 #define LLVM_PERF_JIT_VERSION 1
52 
53 // bit 0: set if the jitdump file is using an architecture-specific timestamp
54 // clock source
55 #define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
56 
57 struct LLVMPerfJitHeader;
58 
59 class PerfJITEventListener : public JITEventListener {
60 public:
61   PerfJITEventListener();
62   ~PerfJITEventListener() {
63     // Lock a mutex to correctly synchronize with prior calls to
64     // `notifyObjectLoaded` and `notifyFreeingObject` that happened on other
65     // threads to prevent tsan from complaining.
66     std::lock_guard<sys::Mutex> Guard(Mutex);
67     if (MarkerAddr)
68       CloseMarker();
69   }
70 
71   void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj,
72                           const RuntimeDyld::LoadedObjectInfo &L) override;
73   void notifyFreeingObject(ObjectKey K) override;
74 
75 private:
76   bool InitDebuggingDir();
77   bool OpenMarker();
78   void CloseMarker();
79   static bool FillMachine(LLVMPerfJitHeader &hdr);
80 
81   void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
82                   uint64_t CodeSize);
83   void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
84 
85   // cache lookups
86   sys::Process::Pid Pid;
87 
88   // base directory for output data
89   std::string JitPath;
90 
91   // output data stream, closed via Dumpstream
92   int DumpFd = -1;
93 
94   // output data stream
95   std::unique_ptr<raw_fd_ostream> Dumpstream;
96 
97   // prevent concurrent dumps from messing up the output file
98   sys::Mutex Mutex;
99 
100   // perf mmap marker
101   void *MarkerAddr = NULL;
102 
103   // perf support ready
104   bool SuccessfullyInitialized = false;
105 
106   // identifier for functions, primarily to identify when moving them around
107   uint64_t CodeGeneration = 1;
108 };
109 
110 // The following are POD struct definitions from the perf jit specification
111 
112 enum LLVMPerfJitRecordType {
113   JIT_CODE_LOAD = 0,
114   JIT_CODE_MOVE = 1, // not emitted, code isn't moved
115   JIT_CODE_DEBUG_INFO = 2,
116   JIT_CODE_CLOSE = 3,          // not emitted, unnecessary
117   JIT_CODE_UNWINDING_INFO = 4, // not emitted
118 
119   JIT_CODE_MAX
120 };
121 
122 struct LLVMPerfJitHeader {
123   uint32_t Magic;     // characters "JiTD"
124   uint32_t Version;   // header version
125   uint32_t TotalSize; // total size of header
126   uint32_t ElfMach;   // elf mach target
127   uint32_t Pad1;      // reserved
128   uint32_t Pid;
129   uint64_t Timestamp; // timestamp
130   uint64_t Flags;     // flags
131 };
132 
133 // record prefix (mandatory in each record)
134 struct LLVMPerfJitRecordPrefix {
135   uint32_t Id; // record type identifier
136   uint32_t TotalSize;
137   uint64_t Timestamp;
138 };
139 
140 struct LLVMPerfJitRecordCodeLoad {
141   LLVMPerfJitRecordPrefix Prefix;
142 
143   uint32_t Pid;
144   uint32_t Tid;
145   uint64_t Vma;
146   uint64_t CodeAddr;
147   uint64_t CodeSize;
148   uint64_t CodeIndex;
149 };
150 
151 struct LLVMPerfJitDebugEntry {
152   uint64_t Addr;
153   int Lineno;  // source line number starting at 1
154   int Discrim; // column discriminator, 0 is default
155   // followed by null terminated filename, \xff\0 if same as previous entry
156 };
157 
158 struct LLVMPerfJitRecordDebugInfo {
159   LLVMPerfJitRecordPrefix Prefix;
160 
161   uint64_t CodeAddr;
162   uint64_t NrEntry;
163   // followed by NrEntry LLVMPerfJitDebugEntry records
164 };
165 
166 static inline uint64_t timespec_to_ns(const struct timespec *ts) {
167   const uint64_t NanoSecPerSec = 1000000000;
168   return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
169 }
170 
171 static inline uint64_t perf_get_timestamp(void) {
172   struct timespec ts;
173   int ret;
174 
175   ret = clock_gettime(CLOCK_MONOTONIC, &ts);
176   if (ret)
177     return 0;
178 
179   return timespec_to_ns(&ts);
180 }
181 
182 PerfJITEventListener::PerfJITEventListener()
183     : Pid(sys::Process::getProcessId()) {
184   // check if clock-source is supported
185   if (!perf_get_timestamp()) {
186     errs() << "kernel does not support CLOCK_MONOTONIC\n";
187     return;
188   }
189 
190   if (!InitDebuggingDir()) {
191     errs() << "could not initialize debugging directory\n";
192     return;
193   }
194 
195   std::string Filename;
196   raw_string_ostream FilenameBuf(Filename);
197   FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
198 
199   // Need to open ourselves, because we need to hand the FD to OpenMarker() and
200   // raw_fd_ostream doesn't expose the FD.
201   using sys::fs::openFileForWrite;
202   if (auto EC = openFileForReadWrite(Filename, DumpFd, sys::fs::CD_CreateNew,
203                                      sys::fs::OF_None)) {
204     errs() << "could not open JIT dump file " << Filename << ": "
205            << EC.message() << "\n";
206     return;
207   }
208 
209   Dumpstream = std::make_unique<raw_fd_ostream>(DumpFd, true);
210 
211   LLVMPerfJitHeader Header = {0, 0, 0, 0, 0, 0, 0, 0};
212   if (!FillMachine(Header))
213     return;
214 
215   // signal this process emits JIT information
216   if (!OpenMarker())
217     return;
218 
219   // emit dumpstream header
220   Header.Magic = LLVM_PERF_JIT_MAGIC;
221   Header.Version = LLVM_PERF_JIT_VERSION;
222   Header.TotalSize = sizeof(Header);
223   Header.Pid = Pid;
224   Header.Timestamp = perf_get_timestamp();
225   Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
226 
227   // Everything initialized, can do profiling now.
228   if (!Dumpstream->has_error())
229     SuccessfullyInitialized = true;
230 }
231 
232 void PerfJITEventListener::notifyObjectLoaded(
233     ObjectKey K, const ObjectFile &Obj,
234     const RuntimeDyld::LoadedObjectInfo &L) {
235 
236   if (!SuccessfullyInitialized)
237     return;
238 
239   OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
240   const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
241 
242   // Get the address of the object image for use as a unique identifier
243   std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
244 
245   // Use symbol info to iterate over functions in the object.
246   for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
247     SymbolRef Sym = P.first;
248     std::string SourceFileName;
249 
250     Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
251     if (!SymTypeOrErr) {
252       // There's not much we can with errors here
253       consumeError(SymTypeOrErr.takeError());
254       continue;
255     }
256     SymbolRef::Type SymType = *SymTypeOrErr;
257     if (SymType != SymbolRef::ST_Function)
258       continue;
259 
260     Expected<StringRef> Name = Sym.getName();
261     if (!Name) {
262       consumeError(Name.takeError());
263       continue;
264     }
265 
266     Expected<uint64_t> AddrOrErr = Sym.getAddress();
267     if (!AddrOrErr) {
268       consumeError(AddrOrErr.takeError());
269       continue;
270     }
271     uint64_t Size = P.second;
272     object::SectionedAddress Address;
273     Address.Address = *AddrOrErr;
274 
275     uint64_t SectionIndex = object::SectionedAddress::UndefSection;
276     if (auto SectOrErr = Sym.getSection())
277         if (*SectOrErr != Obj.section_end())
278             SectionIndex = SectOrErr.get()->getIndex();
279 
280     // According to spec debugging info has to come before loading the
281     // corresponding code load.
282     DILineInfoTable Lines = Context->getLineInfoForAddressRange(
283         {*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath);
284 
285     NotifyDebug(*AddrOrErr, Lines);
286     NotifyCode(Name, *AddrOrErr, Size);
287   }
288 
289   // avoid races with writes
290   std::lock_guard<sys::Mutex> Guard(Mutex);
291 
292   Dumpstream->flush();
293 }
294 
295 void PerfJITEventListener::notifyFreeingObject(ObjectKey K) {
296   // perf currently doesn't have an interface for unloading. But munmap()ing the
297   // code section does, so that's ok.
298 }
299 
300 bool PerfJITEventListener::InitDebuggingDir() {
301   time_t Time;
302   struct tm LocalTime;
303   char TimeBuffer[sizeof("YYYYMMDD")];
304   SmallString<64> Path;
305 
306   // search for location to dump data to
307   if (const char *BaseDir = getenv("JITDUMPDIR"))
308     Path.append(BaseDir);
309   else if (!sys::path::home_directory(Path))
310     Path = ".";
311 
312   // create debug directory
313   Path += "/.debug/jit/";
314   if (auto EC = sys::fs::create_directories(Path)) {
315     errs() << "could not create jit cache directory " << Path << ": "
316            << EC.message() << "\n";
317     return false;
318   }
319 
320   // create unique directory for dump data related to this process
321   time(&Time);
322   localtime_r(&Time, &LocalTime);
323   strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
324   Path += JIT_LANG "-jit-";
325   Path += TimeBuffer;
326 
327   SmallString<128> UniqueDebugDir;
328 
329   using sys::fs::createUniqueDirectory;
330   if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
331     errs() << "could not create unique jit cache directory " << UniqueDebugDir
332            << ": " << EC.message() << "\n";
333     return false;
334   }
335 
336   JitPath = std::string(UniqueDebugDir.str());
337 
338   return true;
339 }
340 
341 bool PerfJITEventListener::OpenMarker() {
342   // We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
343   // is captured either live (perf record running when we mmap) or in deferred
344   // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
345   // file for more meta data info about the jitted code. Perf report/annotate
346   // detect this special filename and process the jitdump file.
347   //
348   // Mapping must be PROT_EXEC to ensure it is captured by perf record
349   // even when not using -d option.
350   MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(),
351                       PROT_READ | PROT_EXEC, MAP_PRIVATE, DumpFd, 0);
352 
353   if (MarkerAddr == MAP_FAILED) {
354     errs() << "could not mmap JIT marker\n";
355     return false;
356   }
357   return true;
358 }
359 
360 void PerfJITEventListener::CloseMarker() {
361   if (!MarkerAddr)
362     return;
363 
364   munmap(MarkerAddr, sys::Process::getPageSizeEstimate());
365   MarkerAddr = nullptr;
366 }
367 
368 bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
369   char id[16];
370   struct {
371     uint16_t e_type;
372     uint16_t e_machine;
373   } info;
374 
375   size_t RequiredMemory = sizeof(id) + sizeof(info);
376 
377   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
378     MemoryBuffer::getFileSlice("/proc/self/exe",
379 			       RequiredMemory,
380 			       0);
381 
382   // This'll not guarantee that enough data was actually read from the
383   // underlying file. Instead the trailing part of the buffer would be
384   // zeroed. Given the ELF signature check below that seems ok though,
385   // it's unlikely that the file ends just after that, and the
386   // consequence would just be that perf wouldn't recognize the
387   // signature.
388   if (auto EC = MB.getError()) {
389     errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
390     return false;
391   }
392 
393   memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
394   memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
395 
396   // check ELF signature
397   if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
398     errs() << "invalid elf signature\n";
399     return false;
400   }
401 
402   hdr.ElfMach = info.e_machine;
403 
404   return true;
405 }
406 
407 void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
408                                       uint64_t CodeAddr, uint64_t CodeSize) {
409   assert(SuccessfullyInitialized);
410 
411   // 0 length functions can't have samples.
412   if (CodeSize == 0)
413     return;
414 
415   LLVMPerfJitRecordCodeLoad rec;
416   rec.Prefix.Id = JIT_CODE_LOAD;
417   rec.Prefix.TotalSize = sizeof(rec) +        // debug record itself
418                          Symbol->size() + 1 + // symbol name
419                          CodeSize;            // and code
420   rec.Prefix.Timestamp = perf_get_timestamp();
421 
422   rec.CodeSize = CodeSize;
423   rec.Vma = CodeAddr;
424   rec.CodeAddr = CodeAddr;
425   rec.Pid = Pid;
426   rec.Tid = get_threadid();
427 
428   // avoid interspersing output
429   std::lock_guard<sys::Mutex> Guard(Mutex);
430 
431   rec.CodeIndex = CodeGeneration++; // under lock!
432 
433   Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
434   Dumpstream->write(Symbol->data(), Symbol->size() + 1);
435   Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
436 }
437 
438 void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
439                                        DILineInfoTable Lines) {
440   assert(SuccessfullyInitialized);
441 
442   // Didn't get useful debug info.
443   if (Lines.empty())
444     return;
445 
446   LLVMPerfJitRecordDebugInfo rec;
447   rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
448   rec.Prefix.TotalSize = sizeof(rec); // will be increased further
449   rec.Prefix.Timestamp = perf_get_timestamp();
450   rec.CodeAddr = CodeAddr;
451   rec.NrEntry = Lines.size();
452 
453   // compute total size of record (variable due to filenames)
454   DILineInfoTable::iterator Begin = Lines.begin();
455   DILineInfoTable::iterator End = Lines.end();
456   for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
457     DILineInfo &line = It->second;
458     rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
459     rec.Prefix.TotalSize += line.FileName.size() + 1;
460   }
461 
462   // The debug_entry describes the source line information. It is defined as
463   // follows in order:
464   // * uint64_t code_addr: address of function for which the debug information
465   // is generated
466   // * uint32_t line     : source file line number (starting at 1)
467   // * uint32_t discrim  : column discriminator, 0 is default
468   // * char name[n]      : source file name in ASCII, including null termination
469 
470   // avoid interspersing output
471   std::lock_guard<sys::Mutex> Guard(Mutex);
472 
473   Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
474 
475   for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
476     LLVMPerfJitDebugEntry LineInfo;
477     DILineInfo &Line = It->second;
478 
479     LineInfo.Addr = It->first;
480     // The function re-created by perf is preceded by a elf
481     // header. Need to adjust for that, otherwise the results are
482     // wrong.
483     LineInfo.Addr += 0x40;
484     LineInfo.Lineno = Line.Line;
485     LineInfo.Discrim = Line.Discriminator;
486 
487     Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
488                       sizeof(LineInfo));
489     Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
490   }
491 }
492 
493 } // end anonymous namespace
494 
495 namespace llvm {
496 JITEventListener *JITEventListener::createPerfJITEventListener() {
497   // There should be only a single event listener per process, otherwise perf
498   // gets confused.
499   static PerfJITEventListener PerfListener;
500   return &PerfListener;
501 }
502 
503 } // namespace llvm
504 
505 LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void)
506 {
507   return wrap(JITEventListener::createPerfJITEventListener());
508 }
509