1 //===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Register objects for access by profilers via the perf JIT interface.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
14
15 #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
16
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/Process.h"
21 #include "llvm/Support/Threading.h"
22
23 #include <mutex>
24 #include <optional>
25
26 #ifdef __linux__
27
28 #include <sys/mman.h> // mmap()
29 #include <time.h> // clock_gettime(), time(), localtime_r() */
30 #include <unistd.h> // for read(), close()
31
32 #define DEBUG_TYPE "orc"
33
34 // language identifier (XXX: should we generate something better from debug
35 // info?)
36 #define JIT_LANG "llvm-IR"
37 #define LLVM_PERF_JIT_MAGIC \
38 ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
39 (uint32_t)'D')
40 #define LLVM_PERF_JIT_VERSION 1
41
42 using namespace llvm;
43 using namespace llvm::orc;
44
45 struct PerfState {
46 // cache lookups
47 uint32_t Pid;
48
49 // base directory for output data
50 std::string JitPath;
51
52 // output data stream, closed via Dumpstream
53 int DumpFd = -1;
54
55 // output data stream
56 std::unique_ptr<raw_fd_ostream> Dumpstream;
57
58 // perf mmap marker
59 void *MarkerAddr = NULL;
60 };
61
62 // prevent concurrent dumps from messing up the output file
63 static std::mutex Mutex;
64 static std::optional<PerfState> State;
65
66 struct RecHeader {
67 uint32_t Id;
68 uint32_t TotalSize;
69 uint64_t Timestamp;
70 };
71
72 struct DIR {
73 RecHeader Prefix;
74 uint64_t CodeAddr;
75 uint64_t NrEntry;
76 };
77
78 struct DIE {
79 uint64_t CodeAddr;
80 uint32_t Line;
81 uint32_t Discrim;
82 };
83
84 struct CLR {
85 RecHeader Prefix;
86 uint32_t Pid;
87 uint32_t Tid;
88 uint64_t Vma;
89 uint64_t CodeAddr;
90 uint64_t CodeSize;
91 uint64_t CodeIndex;
92 };
93
94 struct UWR {
95 RecHeader Prefix;
96 uint64_t UnwindDataSize;
97 uint64_t EhFrameHeaderSize;
98 uint64_t MappedSize;
99 };
100
timespec_to_ns(const struct timespec * TS)101 static inline uint64_t timespec_to_ns(const struct timespec *TS) {
102 const uint64_t NanoSecPerSec = 1000000000;
103 return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;
104 }
105
perf_get_timestamp()106 static inline uint64_t perf_get_timestamp() {
107 timespec TS;
108 if (clock_gettime(CLOCK_MONOTONIC, &TS))
109 return 0;
110
111 return timespec_to_ns(&TS);
112 }
113
writeDebugRecord(const PerfJITDebugInfoRecord & DebugRecord)114 static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
115 assert(State && "PerfState not initialized");
116 LLVM_DEBUG(dbgs() << "Writing debug record with "
117 << DebugRecord.Entries.size() << " entries\n");
118 [[maybe_unused]] size_t Written = 0;
119 DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
120 DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
121 DebugRecord.CodeAddr, DebugRecord.Entries.size()};
122 State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir));
123 Written += sizeof(Dir);
124 for (auto &Die : DebugRecord.Entries) {
125 DIE d{Die.Addr, Die.Lineno, Die.Discrim};
126 State->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));
127 State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1);
128 Written += sizeof(d) + Die.Name.size() + 1;
129 }
130 LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
131 }
132
writeCodeRecord(const PerfJITCodeLoadRecord & CodeRecord)133 static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
134 assert(State && "PerfState not initialized");
135 uint32_t Tid = get_threadid();
136 LLVM_DEBUG(dbgs() << "Writing code record with code size "
137 << CodeRecord.CodeSize << " and code index "
138 << CodeRecord.CodeIndex << "\n");
139 CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
140 CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
141 State->Pid,
142 Tid,
143 CodeRecord.Vma,
144 CodeRecord.CodeAddr,
145 CodeRecord.CodeSize,
146 CodeRecord.CodeIndex};
147 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, "
148 << CodeRecord.Name.size() + 1 << " bytes of name, "
149 << CodeRecord.CodeSize << " bytes of code\n");
150 State->Dumpstream->write(reinterpret_cast<const char *>(&Clr), sizeof(Clr));
151 State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);
152 State->Dumpstream->write((const char *)CodeRecord.CodeAddr,
153 CodeRecord.CodeSize);
154 }
155
156 static void
writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord & UnwindRecord)157 writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
158 assert(State && "PerfState not initialized");
159 dbgs() << "Writing unwind record with unwind data size "
160 << UnwindRecord.UnwindDataSize << " and EH frame header size "
161 << UnwindRecord.EHFrameHdrSize << " and mapped size "
162 << UnwindRecord.MappedSize << "\n";
163 UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
164 UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
165 UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
166 UnwindRecord.MappedSize};
167 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "
168 << UnwindRecord.EHFrameHdrSize
169 << " bytes of EH frame header, "
170 << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
171 << " bytes of EH frame\n");
172 State->Dumpstream->write(reinterpret_cast<const char *>(&Uwr), sizeof(Uwr));
173 if (UnwindRecord.EHFrameHdrAddr)
174 State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,
175 UnwindRecord.EHFrameHdrSize);
176 else
177 State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),
178 UnwindRecord.EHFrameHdrSize);
179 State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,
180 UnwindRecord.UnwindDataSize -
181 UnwindRecord.EHFrameHdrSize);
182 }
183
registerJITLoaderPerfImpl(const PerfJITRecordBatch & Batch)184 static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
185 if (!State)
186 return make_error<StringError>("PerfState not initialized",
187 inconvertibleErrorCode());
188
189 // Serialize the batch
190 std::lock_guard<std::mutex> Lock(Mutex);
191 if (Batch.UnwindingRecord.Prefix.TotalSize > 0)
192 writeUnwindRecord(Batch.UnwindingRecord);
193
194 for (const auto &DebugInfo : Batch.DebugInfoRecords)
195 writeDebugRecord(DebugInfo);
196
197 for (const auto &CodeLoad : Batch.CodeLoadRecords)
198 writeCodeRecord(CodeLoad);
199
200 State->Dumpstream->flush();
201
202 return Error::success();
203 }
204
205 struct Header {
206 uint32_t Magic; // characters "JiTD"
207 uint32_t Version; // header version
208 uint32_t TotalSize; // total size of header
209 uint32_t ElfMach; // elf mach target
210 uint32_t Pad1; // reserved
211 uint32_t Pid;
212 uint64_t Timestamp; // timestamp
213 uint64_t Flags; // flags
214 };
215
OpenMarker(PerfState & State)216 static Error OpenMarker(PerfState &State) {
217 // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
218 // is captured either live (perf record running when we mmap) or in deferred
219 // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
220 // file for more meta data info about the jitted code. Perf report/annotate
221 // detect this special filename and process the jitdump file.
222 //
223 // Mapping must be PROT_EXEC to ensure it is captured by perf record
224 // even when not using -d option.
225 State.MarkerAddr =
226 ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
227 MAP_PRIVATE, State.DumpFd, 0);
228
229 if (State.MarkerAddr == MAP_FAILED)
230 return make_error<llvm::StringError>("could not mmap JIT marker",
231 inconvertibleErrorCode());
232
233 return Error::success();
234 }
235
CloseMarker(PerfState & State)236 void CloseMarker(PerfState &State) {
237 if (!State.MarkerAddr)
238 return;
239
240 munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate());
241 State.MarkerAddr = nullptr;
242 }
243
FillMachine(PerfState & State)244 static Expected<Header> FillMachine(PerfState &State) {
245 Header Hdr;
246 Hdr.Magic = LLVM_PERF_JIT_MAGIC;
247 Hdr.Version = LLVM_PERF_JIT_VERSION;
248 Hdr.TotalSize = sizeof(Hdr);
249 Hdr.Pid = State.Pid;
250 Hdr.Timestamp = perf_get_timestamp();
251
252 char Id[16];
253 struct {
254 uint16_t e_type;
255 uint16_t e_machine;
256 } Info;
257
258 size_t RequiredMemory = sizeof(Id) + sizeof(Info);
259
260 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
261 MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);
262
263 // This'll not guarantee that enough data was actually read from the
264 // underlying file. Instead the trailing part of the buffer would be
265 // zeroed. Given the ELF signature check below that seems ok though,
266 // it's unlikely that the file ends just after that, and the
267 // consequence would just be that perf wouldn't recognize the
268 // signature.
269 if (!MB)
270 return make_error<llvm::StringError>("could not open /proc/self/exe",
271 MB.getError());
272
273 memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id));
274 memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info));
275
276 // check ELF signature
277 if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F')
278 return make_error<llvm::StringError>("invalid ELF signature",
279 inconvertibleErrorCode());
280
281 Hdr.ElfMach = Info.e_machine;
282
283 return Hdr;
284 }
285
InitDebuggingDir(PerfState & State)286 static Error InitDebuggingDir(PerfState &State) {
287 time_t Time;
288 struct tm LocalTime;
289 char TimeBuffer[sizeof("YYYYMMDD")];
290 SmallString<64> Path;
291
292 // search for location to dump data to
293 if (const char *BaseDir = getenv("JITDUMPDIR"))
294 Path.append(BaseDir);
295 else if (!sys::path::home_directory(Path))
296 Path = ".";
297
298 // create debug directory
299 Path += "/.debug/jit/";
300 if (auto EC = sys::fs::create_directories(Path)) {
301 std::string ErrStr;
302 raw_string_ostream ErrStream(ErrStr);
303 ErrStream << "could not create jit cache directory " << Path << ": "
304 << EC.message() << "\n";
305 return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
306 }
307
308 // create unique directory for dump data related to this process
309 time(&Time);
310 localtime_r(&Time, &LocalTime);
311 strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
312 Path += JIT_LANG "-jit-";
313 Path += TimeBuffer;
314
315 SmallString<128> UniqueDebugDir;
316
317 using sys::fs::createUniqueDirectory;
318 if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
319 std::string ErrStr;
320 raw_string_ostream ErrStream(ErrStr);
321 ErrStream << "could not create unique jit cache directory "
322 << UniqueDebugDir << ": " << EC.message() << "\n";
323 return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
324 }
325
326 State.JitPath = std::string(UniqueDebugDir);
327
328 return Error::success();
329 }
330
registerJITLoaderPerfStartImpl()331 static Error registerJITLoaderPerfStartImpl() {
332 PerfState Tentative;
333 Tentative.Pid = sys::Process::getProcessId();
334 // check if clock-source is supported
335 if (!perf_get_timestamp())
336 return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
337 inconvertibleErrorCode());
338
339 if (auto Err = InitDebuggingDir(Tentative))
340 return Err;
341
342 std::string Filename;
343 raw_string_ostream FilenameBuf(Filename);
344 FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump";
345
346 // Need to open ourselves, because we need to hand the FD to OpenMarker() and
347 // raw_fd_ostream doesn't expose the FD.
348 using sys::fs::openFileForWrite;
349 if (auto EC = openFileForReadWrite(FilenameBuf.str(), Tentative.DumpFd,
350 sys::fs::CD_CreateNew, sys::fs::OF_None)) {
351 std::string ErrStr;
352 raw_string_ostream ErrStream(ErrStr);
353 ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": "
354 << EC.message() << "\n";
355 return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
356 }
357
358 Tentative.Dumpstream =
359 std::make_unique<raw_fd_ostream>(Tentative.DumpFd, true);
360
361 auto Header = FillMachine(Tentative);
362 if (!Header)
363 return Header.takeError();
364
365 // signal this process emits JIT information
366 if (auto Err = OpenMarker(Tentative))
367 return Err;
368
369 Tentative.Dumpstream->write(reinterpret_cast<const char *>(&Header.get()),
370 sizeof(*Header));
371
372 // Everything initialized, can do profiling now.
373 if (Tentative.Dumpstream->has_error())
374 return make_error<StringError>("could not write JIT dump header",
375 inconvertibleErrorCode());
376
377 State = std::move(Tentative);
378 return Error::success();
379 }
380
registerJITLoaderPerfEndImpl()381 static Error registerJITLoaderPerfEndImpl() {
382 if (!State)
383 return make_error<StringError>("PerfState not initialized",
384 inconvertibleErrorCode());
385
386 RecHeader Close;
387 Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
388 Close.TotalSize = sizeof(Close);
389 Close.Timestamp = perf_get_timestamp();
390 State->Dumpstream->write(reinterpret_cast<const char *>(&Close),
391 sizeof(Close));
392 if (State->MarkerAddr)
393 CloseMarker(*State);
394
395 State.reset();
396 return Error::success();
397 }
398
399 extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderPerfImpl(const char * Data,uint64_t Size)400 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
401 using namespace orc::shared;
402 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
403 Data, Size, registerJITLoaderPerfImpl)
404 .release();
405 }
406
407 extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderPerfStart(const char * Data,uint64_t Size)408 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
409 using namespace orc::shared;
410 return WrapperFunction<SPSError()>::handle(Data, Size,
411 registerJITLoaderPerfStartImpl)
412 .release();
413 }
414
415 extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderPerfEnd(const char * Data,uint64_t Size)416 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
417 using namespace orc::shared;
418 return WrapperFunction<SPSError()>::handle(Data, Size,
419 registerJITLoaderPerfEndImpl)
420 .release();
421 }
422
423 #else
424
425 using namespace llvm;
426 using namespace llvm::orc;
427
badOS()428 static Error badOS() {
429 using namespace llvm;
430 return llvm::make_error<StringError>(
431 "unsupported OS (perf support is only available on linux!)",
432 inconvertibleErrorCode());
433 }
434
badOSBatch(PerfJITRecordBatch & Batch)435 static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
436
437 extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderPerfImpl(const char * Data,uint64_t Size)438 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
439 using namespace shared;
440 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,
441 badOSBatch)
442 .release();
443 }
444
445 extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderPerfStart(const char * Data,uint64_t Size)446 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
447 using namespace shared;
448 return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
449 }
450
451 extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderPerfEnd(const char * Data,uint64_t Size)452 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
453 using namespace shared;
454 return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
455 }
456
457 #endif
458