1 //===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a JITEventListener object that tells perf about JITted
10 // functions, including source line information.
11 //
12 // Documentation for perf jit integration is available at:
13 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
14 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Config/config.h"
20 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
21 #include "llvm/ExecutionEngine/JITEventListener.h"
22 #include "llvm/Object/ObjectFile.h"
23 #include "llvm/Object/SymbolSize.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Errno.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Mutex.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Threading.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <mutex>
34
35 #include <sys/mman.h> // mmap()
36 #include <time.h> // clock_gettime(), time(), localtime_r() */
37 #include <unistd.h> // for read(), close()
38
39 using namespace llvm;
40 using namespace llvm::object;
41 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
42
43 namespace {
44
45 // language identifier (XXX: should we generate something better from debug
46 // info?)
47 #define JIT_LANG "llvm-IR"
48 #define LLVM_PERF_JIT_MAGIC \
49 ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
50 (uint32_t)'D')
51 #define LLVM_PERF_JIT_VERSION 1
52
53 // bit 0: set if the jitdump file is using an architecture-specific timestamp
54 // clock source
55 #define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
56
57 struct LLVMPerfJitHeader;
58
59 class PerfJITEventListener : public JITEventListener {
60 public:
61 PerfJITEventListener();
~PerfJITEventListener()62 ~PerfJITEventListener() {
63 if (MarkerAddr)
64 CloseMarker();
65 }
66
67 void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj,
68 const RuntimeDyld::LoadedObjectInfo &L) override;
69 void notifyFreeingObject(ObjectKey K) override;
70
71 private:
72 bool InitDebuggingDir();
73 bool OpenMarker();
74 void CloseMarker();
75 static bool FillMachine(LLVMPerfJitHeader &hdr);
76
77 void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
78 uint64_t CodeSize);
79 void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
80
81 // cache lookups
82 sys::Process::Pid Pid;
83
84 // base directory for output data
85 std::string JitPath;
86
87 // output data stream, closed via Dumpstream
88 int DumpFd = -1;
89
90 // output data stream
91 std::unique_ptr<raw_fd_ostream> Dumpstream;
92
93 // prevent concurrent dumps from messing up the output file
94 sys::Mutex Mutex;
95
96 // perf mmap marker
97 void *MarkerAddr = NULL;
98
99 // perf support ready
100 bool SuccessfullyInitialized = false;
101
102 // identifier for functions, primarily to identify when moving them around
103 uint64_t CodeGeneration = 1;
104 };
105
106 // The following are POD struct definitions from the perf jit specification
107
108 enum LLVMPerfJitRecordType {
109 JIT_CODE_LOAD = 0,
110 JIT_CODE_MOVE = 1, // not emitted, code isn't moved
111 JIT_CODE_DEBUG_INFO = 2,
112 JIT_CODE_CLOSE = 3, // not emitted, unnecessary
113 JIT_CODE_UNWINDING_INFO = 4, // not emitted
114
115 JIT_CODE_MAX
116 };
117
118 struct LLVMPerfJitHeader {
119 uint32_t Magic; // characters "JiTD"
120 uint32_t Version; // header version
121 uint32_t TotalSize; // total size of header
122 uint32_t ElfMach; // elf mach target
123 uint32_t Pad1; // reserved
124 uint32_t Pid;
125 uint64_t Timestamp; // timestamp
126 uint64_t Flags; // flags
127 };
128
129 // record prefix (mandatory in each record)
130 struct LLVMPerfJitRecordPrefix {
131 uint32_t Id; // record type identifier
132 uint32_t TotalSize;
133 uint64_t Timestamp;
134 };
135
136 struct LLVMPerfJitRecordCodeLoad {
137 LLVMPerfJitRecordPrefix Prefix;
138
139 uint32_t Pid;
140 uint32_t Tid;
141 uint64_t Vma;
142 uint64_t CodeAddr;
143 uint64_t CodeSize;
144 uint64_t CodeIndex;
145 };
146
147 struct LLVMPerfJitDebugEntry {
148 uint64_t Addr;
149 int Lineno; // source line number starting at 1
150 int Discrim; // column discriminator, 0 is default
151 // followed by null terminated filename, \xff\0 if same as previous entry
152 };
153
154 struct LLVMPerfJitRecordDebugInfo {
155 LLVMPerfJitRecordPrefix Prefix;
156
157 uint64_t CodeAddr;
158 uint64_t NrEntry;
159 // followed by NrEntry LLVMPerfJitDebugEntry records
160 };
161
timespec_to_ns(const struct timespec * ts)162 static inline uint64_t timespec_to_ns(const struct timespec *ts) {
163 const uint64_t NanoSecPerSec = 1000000000;
164 return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
165 }
166
perf_get_timestamp(void)167 static inline uint64_t perf_get_timestamp(void) {
168 struct timespec ts;
169 int ret;
170
171 ret = clock_gettime(CLOCK_MONOTONIC, &ts);
172 if (ret)
173 return 0;
174
175 return timespec_to_ns(&ts);
176 }
177
PerfJITEventListener()178 PerfJITEventListener::PerfJITEventListener()
179 : Pid(sys::Process::getProcessId()) {
180 // check if clock-source is supported
181 if (!perf_get_timestamp()) {
182 errs() << "kernel does not support CLOCK_MONOTONIC\n";
183 return;
184 }
185
186 if (!InitDebuggingDir()) {
187 errs() << "could not initialize debugging directory\n";
188 return;
189 }
190
191 std::string Filename;
192 raw_string_ostream FilenameBuf(Filename);
193 FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
194
195 // Need to open ourselves, because we need to hand the FD to OpenMarker() and
196 // raw_fd_ostream doesn't expose the FD.
197 using sys::fs::openFileForWrite;
198 if (auto EC =
199 openFileForReadWrite(FilenameBuf.str(), DumpFd,
200 sys::fs::CD_CreateNew, sys::fs::OF_None)) {
201 errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
202 << EC.message() << "\n";
203 return;
204 }
205
206 Dumpstream = std::make_unique<raw_fd_ostream>(DumpFd, true);
207
208 LLVMPerfJitHeader Header = {0, 0, 0, 0, 0, 0, 0, 0};
209 if (!FillMachine(Header))
210 return;
211
212 // signal this process emits JIT information
213 if (!OpenMarker())
214 return;
215
216 // emit dumpstream header
217 Header.Magic = LLVM_PERF_JIT_MAGIC;
218 Header.Version = LLVM_PERF_JIT_VERSION;
219 Header.TotalSize = sizeof(Header);
220 Header.Pid = Pid;
221 Header.Timestamp = perf_get_timestamp();
222 Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
223
224 // Everything initialized, can do profiling now.
225 if (!Dumpstream->has_error())
226 SuccessfullyInitialized = true;
227 }
228
notifyObjectLoaded(ObjectKey K,const ObjectFile & Obj,const RuntimeDyld::LoadedObjectInfo & L)229 void PerfJITEventListener::notifyObjectLoaded(
230 ObjectKey K, const ObjectFile &Obj,
231 const RuntimeDyld::LoadedObjectInfo &L) {
232
233 if (!SuccessfullyInitialized)
234 return;
235
236 OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
237 const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
238
239 // Get the address of the object image for use as a unique identifier
240 std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
241
242 // Use symbol info to iterate over functions in the object.
243 for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
244 SymbolRef Sym = P.first;
245 std::string SourceFileName;
246
247 Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
248 if (!SymTypeOrErr) {
249 // There's not much we can with errors here
250 consumeError(SymTypeOrErr.takeError());
251 continue;
252 }
253 SymbolRef::Type SymType = *SymTypeOrErr;
254 if (SymType != SymbolRef::ST_Function)
255 continue;
256
257 Expected<StringRef> Name = Sym.getName();
258 if (!Name) {
259 consumeError(Name.takeError());
260 continue;
261 }
262
263 Expected<uint64_t> AddrOrErr = Sym.getAddress();
264 if (!AddrOrErr) {
265 consumeError(AddrOrErr.takeError());
266 continue;
267 }
268 uint64_t Size = P.second;
269 object::SectionedAddress Address;
270 Address.Address = *AddrOrErr;
271
272 uint64_t SectionIndex = object::SectionedAddress::UndefSection;
273 if (auto SectOrErr = Sym.getSection())
274 if (*SectOrErr != Obj.section_end())
275 SectionIndex = SectOrErr.get()->getIndex();
276
277 // According to spec debugging info has to come before loading the
278 // corresonding code load.
279 DILineInfoTable Lines = Context->getLineInfoForAddressRange(
280 {*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath);
281
282 NotifyDebug(*AddrOrErr, Lines);
283 NotifyCode(Name, *AddrOrErr, Size);
284 }
285
286 // avoid races with writes
287 std::lock_guard<sys::Mutex> Guard(Mutex);
288
289 Dumpstream->flush();
290 }
291
notifyFreeingObject(ObjectKey K)292 void PerfJITEventListener::notifyFreeingObject(ObjectKey K) {
293 // perf currently doesn't have an interface for unloading. But munmap()ing the
294 // code section does, so that's ok.
295 }
296
InitDebuggingDir()297 bool PerfJITEventListener::InitDebuggingDir() {
298 time_t Time;
299 struct tm LocalTime;
300 char TimeBuffer[sizeof("YYYYMMDD")];
301 SmallString<64> Path;
302
303 // search for location to dump data to
304 if (const char *BaseDir = getenv("JITDUMPDIR"))
305 Path.append(BaseDir);
306 else if (!sys::path::home_directory(Path))
307 Path = ".";
308
309 // create debug directory
310 Path += "/.debug/jit/";
311 if (auto EC = sys::fs::create_directories(Path)) {
312 errs() << "could not create jit cache directory " << Path << ": "
313 << EC.message() << "\n";
314 return false;
315 }
316
317 // create unique directory for dump data related to this process
318 time(&Time);
319 localtime_r(&Time, &LocalTime);
320 strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
321 Path += JIT_LANG "-jit-";
322 Path += TimeBuffer;
323
324 SmallString<128> UniqueDebugDir;
325
326 using sys::fs::createUniqueDirectory;
327 if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
328 errs() << "could not create unique jit cache directory " << UniqueDebugDir
329 << ": " << EC.message() << "\n";
330 return false;
331 }
332
333 JitPath = std::string(UniqueDebugDir.str());
334
335 return true;
336 }
337
OpenMarker()338 bool PerfJITEventListener::OpenMarker() {
339 // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
340 // is captured either live (perf record running when we mmap) or in deferred
341 // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
342 // file for more meta data info about the jitted code. Perf report/annotate
343 // detect this special filename and process the jitdump file.
344 //
345 // Mapping must be PROT_EXEC to ensure it is captured by perf record
346 // even when not using -d option.
347 MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(),
348 PROT_READ | PROT_EXEC, MAP_PRIVATE, DumpFd, 0);
349
350 if (MarkerAddr == MAP_FAILED) {
351 errs() << "could not mmap JIT marker\n";
352 return false;
353 }
354 return true;
355 }
356
CloseMarker()357 void PerfJITEventListener::CloseMarker() {
358 if (!MarkerAddr)
359 return;
360
361 munmap(MarkerAddr, sys::Process::getPageSizeEstimate());
362 MarkerAddr = nullptr;
363 }
364
FillMachine(LLVMPerfJitHeader & hdr)365 bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
366 char id[16];
367 struct {
368 uint16_t e_type;
369 uint16_t e_machine;
370 } info;
371
372 size_t RequiredMemory = sizeof(id) + sizeof(info);
373
374 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
375 MemoryBuffer::getFileSlice("/proc/self/exe",
376 RequiredMemory,
377 0);
378
379 // This'll not guarantee that enough data was actually read from the
380 // underlying file. Instead the trailing part of the buffer would be
381 // zeroed. Given the ELF signature check below that seems ok though,
382 // it's unlikely that the file ends just after that, and the
383 // consequence would just be that perf wouldn't recognize the
384 // signature.
385 if (auto EC = MB.getError()) {
386 errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
387 return false;
388 }
389
390 memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
391 memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
392
393 // check ELF signature
394 if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
395 errs() << "invalid elf signature\n";
396 return false;
397 }
398
399 hdr.ElfMach = info.e_machine;
400
401 return true;
402 }
403
NotifyCode(Expected<llvm::StringRef> & Symbol,uint64_t CodeAddr,uint64_t CodeSize)404 void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
405 uint64_t CodeAddr, uint64_t CodeSize) {
406 assert(SuccessfullyInitialized);
407
408 // 0 length functions can't have samples.
409 if (CodeSize == 0)
410 return;
411
412 LLVMPerfJitRecordCodeLoad rec;
413 rec.Prefix.Id = JIT_CODE_LOAD;
414 rec.Prefix.TotalSize = sizeof(rec) + // debug record itself
415 Symbol->size() + 1 + // symbol name
416 CodeSize; // and code
417 rec.Prefix.Timestamp = perf_get_timestamp();
418
419 rec.CodeSize = CodeSize;
420 rec.Vma = 0;
421 rec.CodeAddr = CodeAddr;
422 rec.Pid = Pid;
423 rec.Tid = get_threadid();
424
425 // avoid interspersing output
426 std::lock_guard<sys::Mutex> Guard(Mutex);
427
428 rec.CodeIndex = CodeGeneration++; // under lock!
429
430 Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
431 Dumpstream->write(Symbol->data(), Symbol->size() + 1);
432 Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
433 }
434
NotifyDebug(uint64_t CodeAddr,DILineInfoTable Lines)435 void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
436 DILineInfoTable Lines) {
437 assert(SuccessfullyInitialized);
438
439 // Didn't get useful debug info.
440 if (Lines.empty())
441 return;
442
443 LLVMPerfJitRecordDebugInfo rec;
444 rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
445 rec.Prefix.TotalSize = sizeof(rec); // will be increased further
446 rec.Prefix.Timestamp = perf_get_timestamp();
447 rec.CodeAddr = CodeAddr;
448 rec.NrEntry = Lines.size();
449
450 // compute total size size of record (variable due to filenames)
451 DILineInfoTable::iterator Begin = Lines.begin();
452 DILineInfoTable::iterator End = Lines.end();
453 for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
454 DILineInfo &line = It->second;
455 rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
456 rec.Prefix.TotalSize += line.FileName.size() + 1;
457 }
458
459 // The debug_entry describes the source line information. It is defined as
460 // follows in order:
461 // * uint64_t code_addr: address of function for which the debug information
462 // is generated
463 // * uint32_t line : source file line number (starting at 1)
464 // * uint32_t discrim : column discriminator, 0 is default
465 // * char name[n] : source file name in ASCII, including null termination
466
467 // avoid interspersing output
468 std::lock_guard<sys::Mutex> Guard(Mutex);
469
470 Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
471
472 for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
473 LLVMPerfJitDebugEntry LineInfo;
474 DILineInfo &Line = It->second;
475
476 LineInfo.Addr = It->first;
477 // The function re-created by perf is preceded by a elf
478 // header. Need to adjust for that, otherwise the results are
479 // wrong.
480 LineInfo.Addr += 0x40;
481 LineInfo.Lineno = Line.Line;
482 LineInfo.Discrim = Line.Discriminator;
483
484 Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
485 sizeof(LineInfo));
486 Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
487 }
488 }
489
490 } // end anonymous namespace
491
492 namespace llvm {
createPerfJITEventListener()493 JITEventListener *JITEventListener::createPerfJITEventListener() {
494 // There should be only a single event listener per process, otherwise perf
495 // gets confused.
496 static PerfJITEventListener PerfListener;
497 return &PerfListener;
498 }
499
500 } // namespace llvm
501
LLVMCreatePerfJITEventListener(void)502 LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void)
503 {
504 return wrap(JITEventListener::createPerfJITEventListener());
505 }
506