xref: /llvm-project/offload/plugins-nextgen/common/include/ErrorReporting.h (revision 3b7611594f010ecd5233ab9580b2feb88837f9ef)
1 //===- ErrorReporting.h - Helper to provide nice error messages ----- c++ -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #ifndef OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
12 #define OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
13 
14 #include "PluginInterface.h"
15 #include "Shared/EnvironmentVar.h"
16 
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Frontend/OpenMP/OMP.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/WithColor.h"
23 #include "llvm/Support/raw_ostream.h"
24 
25 #include <cstdint>
26 #include <cstdio>
27 #include <cstdlib>
28 #include <functional>
29 #include <optional>
30 #include <string>
31 #include <unistd.h>
32 
33 namespace llvm {
34 namespace omp {
35 namespace target {
36 namespace plugin {
37 
38 class ErrorReporter {
39 
40   enum ColorTy {
41     Yellow = int(HighlightColor::Address),
42     Green = int(HighlightColor::String),
43     DarkBlue = int(HighlightColor::Tag),
44     Cyan = int(HighlightColor::Attribute),
45     DarkPurple = int(HighlightColor::Enumerator),
46     DarkRed = int(HighlightColor::Macro),
47     BoldRed = int(HighlightColor::Error),
48     BoldLightPurple = int(HighlightColor::Warning),
49     BoldDarkGrey = int(HighlightColor::Note),
50     BoldLightBlue = int(HighlightColor::Remark),
51   };
52 
53   /// The banner printed at the beginning of an error report.
54   static constexpr auto ErrorBanner = "OFFLOAD ERROR: ";
55 
56   /// Return the device id as string, or n/a if not available.
57   static std::string getDeviceIdStr(GenericDeviceTy *Device) {
58     return Device ? std::to_string(Device->getDeviceId()) : "n/a";
59   }
60 
61   /// Return a nice name for an TargetAllocTy.
62   static StringRef getAllocTyName(TargetAllocTy Kind) {
63     switch (Kind) {
64     case TARGET_ALLOC_DEVICE_NON_BLOCKING:
65     case TARGET_ALLOC_DEFAULT:
66     case TARGET_ALLOC_DEVICE:
67       return "device memory";
68     case TARGET_ALLOC_HOST:
69       return "pinned host memory";
70     case TARGET_ALLOC_SHARED:
71       return "managed memory";
72       break;
73     }
74     llvm_unreachable("Unknown target alloc kind");
75   }
76 
77 #pragma clang diagnostic push
78 #pragma clang diagnostic ignored "-Wgcc-compat"
79 #pragma clang diagnostic ignored "-Wformat-security"
80   /// Print \p Format, instantiated with \p Args to stderr.
81   /// TODO: Allow redirection into a file stream.
82   template <typename... ArgsTy>
83   [[gnu::format(__printf__, 1, 2)]] static void print(const char *Format,
84                                                       ArgsTy &&...Args) {
85     raw_fd_ostream OS(STDERR_FILENO, false);
86     OS << llvm::format(Format, Args...);
87   }
88 
89   /// Print \p Format, instantiated with \p Args to stderr, but colored.
90   /// TODO: Allow redirection into a file stream.
91   template <typename... ArgsTy>
92   [[gnu::format(__printf__, 2, 3)]] static void
93   print(ColorTy Color, const char *Format, ArgsTy &&...Args) {
94     raw_fd_ostream OS(STDERR_FILENO, false);
95     WithColor(OS, HighlightColor(Color)) << llvm::format(Format, Args...);
96   }
97 
98   /// Print \p Format, instantiated with \p Args to stderr, but colored and with
99   /// a banner.
100   /// TODO: Allow redirection into a file stream.
101   template <typename... ArgsTy>
102   [[gnu::format(__printf__, 1, 2)]] static void reportError(const char *Format,
103                                                             ArgsTy &&...Args) {
104     print(BoldRed, "%s", ErrorBanner);
105     print(BoldRed, Format, Args...);
106     print("\n");
107   }
108 #pragma clang diagnostic pop
109 
110   static void reportError(const char *Str) { reportError("%s", Str); }
111   static void print(const char *Str) { print("%s", Str); }
112   static void print(StringRef Str) { print("%s", Str.str().c_str()); }
113   static void print(ColorTy Color, const char *Str) { print(Color, "%s", Str); }
114   static void print(ColorTy Color, StringRef Str) {
115     print(Color, "%s", Str.str().c_str());
116   }
117 
118   /// Pretty print a stack trace.
119   static void reportStackTrace(StringRef StackTrace) {
120     if (StackTrace.empty())
121       return;
122 
123     SmallVector<StringRef> Lines, Parts;
124     StackTrace.split(Lines, "\n", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
125     int Start = Lines.empty() || !Lines[0].contains("PrintStackTrace") ? 0 : 1;
126     unsigned NumDigits =
127         (int)(floor(log10(Lines.size() - Start - /*0*/ 1)) + 1);
128     for (int I = Start, E = Lines.size(); I < E; ++I) {
129       auto Line = Lines[I];
130       Parts.clear();
131       Line = Line.drop_while([](char C) { return std::isspace(C); });
132       Line.split(Parts, " ", /*MaxSplit=*/2);
133       if (Parts.size() != 3 || Parts[0].size() < 2 || Parts[0][0] != '#') {
134         print("%s\n", Line.str().c_str());
135         continue;
136       }
137       unsigned FrameIdx = std::stoi(Parts[0].drop_front(1).str());
138       if (Start)
139         FrameIdx -= 1;
140       print(DarkPurple, "    %s", Parts[0].take_front().str().c_str());
141       print(Green, "%*u", NumDigits, FrameIdx);
142       print(BoldLightBlue, " %s", Parts[1].str().c_str());
143       print(" %s\n", Parts[2].str().c_str());
144     }
145     print("\n");
146   }
147 
148   /// Report information about an allocation associated with \p ATI.
149   static void reportAllocationInfo(AllocationTraceInfoTy *ATI) {
150     if (!ATI)
151       return;
152 
153     if (!ATI->DeallocationTrace.empty()) {
154       print(BoldLightPurple, "Last deallocation:\n");
155       reportStackTrace(ATI->DeallocationTrace);
156     }
157 
158     if (ATI->HostPtr)
159       print(BoldLightPurple,
160             "Last allocation of size %lu for host pointer %p -> device pointer "
161             "%p:\n",
162             ATI->Size, ATI->HostPtr, ATI->DevicePtr);
163     else
164       print(BoldLightPurple,
165             "Last allocation of size %lu -> device pointer %p:\n", ATI->Size,
166             ATI->DevicePtr);
167     reportStackTrace(ATI->AllocationTrace);
168     if (!ATI->LastAllocationInfo)
169       return;
170 
171     unsigned I = 0;
172     print(BoldLightPurple, "Prior allocations with the same base pointer:");
173     while (ATI->LastAllocationInfo) {
174       print("\n");
175       ATI = ATI->LastAllocationInfo;
176       print(BoldLightPurple, " #%u Prior deallocation of size %lu:\n", I,
177             ATI->Size);
178       reportStackTrace(ATI->DeallocationTrace);
179       if (ATI->HostPtr)
180         print(
181             BoldLightPurple,
182             " #%u Prior allocation for host pointer %p -> device pointer %p:\n",
183             I, ATI->HostPtr, ATI->DevicePtr);
184       else
185         print(BoldLightPurple, " #%u Prior allocation -> device pointer %p:\n",
186               I, ATI->DevicePtr);
187       reportStackTrace(ATI->AllocationTrace);
188       ++I;
189     }
190   }
191 
192   /// End the execution of the program.
193   static void abortExecution() { abort(); }
194 
195 public:
196 #define DEALLOCATION_ERROR(Format, ...)                                        \
197   reportError(Format, __VA_ARGS__);                                            \
198   reportStackTrace(StackTrace);                                                \
199   reportAllocationInfo(ATI);                                                   \
200   abortExecution();
201 
202   static void reportDeallocationOfNonAllocatedPtr(void *DevicePtr,
203                                                   TargetAllocTy Kind,
204                                                   AllocationTraceInfoTy *ATI,
205                                                   std::string &StackTrace) {
206     DEALLOCATION_ERROR("deallocation of non-allocated %s: %p",
207                        getAllocTyName(Kind).data(), DevicePtr);
208   }
209 
210   static void reportDeallocationOfDeallocatedPtr(void *DevicePtr,
211                                                  TargetAllocTy Kind,
212                                                  AllocationTraceInfoTy *ATI,
213                                                  std::string &StackTrace) {
214     DEALLOCATION_ERROR("double-free of %s: %p", getAllocTyName(Kind).data(),
215                        DevicePtr);
216   }
217 
218   static void reportDeallocationOfWrongPtrKind(void *DevicePtr,
219                                                TargetAllocTy Kind,
220                                                AllocationTraceInfoTy *ATI,
221                                                std::string &StackTrace) {
222     DEALLOCATION_ERROR("deallocation requires %s but allocation was %s: %p",
223                        getAllocTyName(Kind).data(),
224                        getAllocTyName(ATI->Kind).data(), DevicePtr);
225 #undef DEALLOCATION_ERROR
226   }
227 
228   static void reportMemoryAccessError(GenericDeviceTy &Device, void *DevicePtr,
229                                       std::string &ErrorStr, bool Abort) {
230     reportError(ErrorStr.c_str());
231 
232     if (!Device.OMPX_TrackAllocationTraces) {
233       print(Yellow, "Use '%s=true' to track device allocations\n",
234             Device.OMPX_TrackAllocationTraces.getName().data());
235       if (Abort)
236         abortExecution();
237       return;
238     }
239     uintptr_t Distance = false;
240     auto *ATI =
241         Device.getClosestAllocationTraceInfoForAddr(DevicePtr, Distance);
242     if (!ATI) {
243       print(Cyan,
244             "No host-issued allocations; device pointer %p might be "
245             "a global, stack, or shared location\n",
246             DevicePtr);
247       if (Abort)
248         abortExecution();
249       return;
250     }
251     if (!Distance) {
252       print(Cyan, "Device pointer %p points into%s host-issued allocation:\n",
253             DevicePtr, ATI->DeallocationTrace.empty() ? "" : " prior");
254       reportAllocationInfo(ATI);
255       if (Abort)
256         abortExecution();
257       return;
258     }
259 
260     bool IsClose = Distance < (1L << 29L /*512MB=*/);
261     print(Cyan,
262           "Device pointer %p does not point into any (current or prior) "
263           "host-issued allocation%s.\n",
264           DevicePtr,
265           IsClose ? "" : " (might be a global, stack, or shared location)");
266     if (IsClose) {
267       print(Cyan,
268             "Closest host-issued allocation (distance %" PRIuPTR
269             " byte%s; might be by page):\n",
270             Distance, Distance > 1 ? "s" : "");
271       reportAllocationInfo(ATI);
272     }
273     if (Abort)
274       abortExecution();
275   }
276 
277   /// Report that a kernel encountered a trap instruction.
278   static void reportTrapInKernel(
279       GenericDeviceTy &Device, KernelTraceInfoRecordTy &KTIR,
280       std::function<bool(__tgt_async_info &)> AsyncInfoWrapperMatcher) {
281     assert(AsyncInfoWrapperMatcher && "A matcher is required");
282 
283     uint32_t Idx = 0;
284     for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) {
285       auto KTI = KTIR.getKernelTraceInfo(I);
286       if (KTI.Kernel == nullptr)
287         break;
288       // Skip kernels issued in other queues.
289       if (KTI.AsyncInfo && !(AsyncInfoWrapperMatcher(*KTI.AsyncInfo)))
290         continue;
291       Idx = I;
292       break;
293     }
294 
295     auto KTI = KTIR.getKernelTraceInfo(Idx);
296     if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) {
297       auto PrettyKernelName =
298           llvm::omp::prettifyFunctionName(KTI.Kernel->getName());
299       reportError("Kernel '%s'", PrettyKernelName.c_str());
300     }
301     reportError("execution interrupted by hardware trap instruction");
302     if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) {
303       if (!KTI.LaunchTrace.empty())
304         reportStackTrace(KTI.LaunchTrace);
305       else
306         print(Yellow, "Use '%s=1' to show the stack trace of the kernel\n",
307               Device.OMPX_TrackNumKernelLaunches.getName().data());
308     }
309     abort();
310   }
311 
312   /// Report the kernel traces taken from \p KTIR, up to
313   /// OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES many.
314   static void reportKernelTraces(GenericDeviceTy &Device,
315                                  KernelTraceInfoRecordTy &KTIR) {
316     uint32_t NumKTIs = 0;
317     for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) {
318       auto KTI = KTIR.getKernelTraceInfo(I);
319       if (KTI.Kernel == nullptr)
320         break;
321       ++NumKTIs;
322     }
323     if (NumKTIs == 0) {
324       print(BoldRed, "No kernel launches known\n");
325       return;
326     }
327 
328     uint32_t TracesToShow =
329         std::min(Device.OMPX_TrackNumKernelLaunches.get(), NumKTIs);
330     if (TracesToShow == 0) {
331       if (NumKTIs == 1)
332         print(BoldLightPurple, "Display only launched kernel:\n");
333       else
334         print(BoldLightPurple, "Display last %u kernels launched:\n", NumKTIs);
335     } else {
336       if (NumKTIs == 1)
337         print(BoldLightPurple, "Display kernel launch trace:\n");
338       else
339         print(BoldLightPurple,
340               "Display %u of the %u last kernel launch traces:\n", TracesToShow,
341               NumKTIs);
342     }
343 
344     for (uint32_t Idx = 0, I = 0; I < NumKTIs; ++Idx) {
345       auto KTI = KTIR.getKernelTraceInfo(Idx);
346       auto PrettyKernelName =
347           llvm::omp::prettifyFunctionName(KTI.Kernel->getName());
348       if (NumKTIs == 1)
349         print(BoldLightPurple, "Kernel '%s'\n", PrettyKernelName.c_str());
350       else
351         print(BoldLightPurple, "Kernel %d: '%s'\n", I,
352               PrettyKernelName.c_str());
353       reportStackTrace(KTI.LaunchTrace);
354       ++I;
355     }
356 
357     if (NumKTIs != 1) {
358       print(Yellow,
359             "Use '%s=<num>' to adjust the number of shown stack traces (%u "
360             "now, up to %zu)\n",
361             Device.OMPX_TrackNumKernelLaunches.getName().data(),
362             Device.OMPX_TrackNumKernelLaunches.get(), KTIR.size());
363     }
364     // TODO: Let users know how to serialize kernels
365   }
366 };
367 
368 } // namespace plugin
369 } // namespace target
370 } // namespace omp
371 } // namespace llvm
372 
373 #endif // OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
374