xref: /llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp (revision ef9aa34f0274cdbfa82c47f8ab99f02679fd0d13)
1 //===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Support/CrashRecoveryContext.h"
10 #include "llvm/Config/llvm-config.h"
11 #include "llvm/Support/ErrorHandling.h"
12 #include "llvm/Support/ExitCodes.h"
13 #include "llvm/Support/Signals.h"
14 #include "llvm/Support/ThreadLocal.h"
15 #include "llvm/Support/thread.h"
16 #include <mutex>
17 #include <setjmp.h>
18 
19 using namespace llvm;
20 
21 namespace {
22 
23 struct CrashRecoveryContextImpl;
24 
25 sys::ThreadLocal<const CrashRecoveryContextImpl> &getCurrentContext() {
26   static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext;
27   return CurrentContext;
28 }
29 
30 struct CrashRecoveryContextImpl {
31   // When threads are disabled, this links up all active
32   // CrashRecoveryContextImpls.  When threads are enabled there's one thread
33   // per CrashRecoveryContext and CurrentContext is a thread-local, so only one
34   // CrashRecoveryContextImpl is active per thread and this is always null.
35   const CrashRecoveryContextImpl *Next;
36 
37   CrashRecoveryContext *CRC;
38   ::jmp_buf JumpBuffer;
39   volatile unsigned Failed : 1;
40   unsigned SwitchedThread : 1;
41   unsigned ValidJumpBuffer : 1;
42 
43 public:
44   CrashRecoveryContextImpl(CrashRecoveryContext *CRC) noexcept
45       : CRC(CRC), Failed(false), SwitchedThread(false), ValidJumpBuffer(false) {
46     Next = getCurrentContext().get();
47     getCurrentContext().set(this);
48   }
49   ~CrashRecoveryContextImpl() {
50     if (!SwitchedThread)
51       getCurrentContext().set(Next);
52   }
53 
54   /// Called when the separate crash-recovery thread was finished, to
55   /// indicate that we don't need to clear the thread-local CurrentContext.
56   void setSwitchedThread() {
57 #if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0
58     SwitchedThread = true;
59 #endif
60   }
61 
62   // If the function ran by the CrashRecoveryContext crashes or fails, then
63   // 'RetCode' represents the returned error code, as if it was returned by a
64   // process. 'Context' represents the signal type on Unix; on Windows, it is
65   // the ExceptionContext.
66   void HandleCrash(int RetCode, uintptr_t Context) {
67     // Eliminate the current context entry, to avoid re-entering in case the
68     // cleanup code crashes.
69     getCurrentContext().set(Next);
70 
71     assert(!Failed && "Crash recovery context already failed!");
72     Failed = true;
73 
74     if (CRC->DumpStackAndCleanupOnFailure)
75       sys::CleanupOnSignal(Context);
76 
77     CRC->RetCode = RetCode;
78 
79     // Jump back to the RunSafely we were called under.
80     if (ValidJumpBuffer)
81       longjmp(JumpBuffer, 1);
82 
83     // Otherwise let the caller decide of the outcome of the crash. Currently
84     // this occurs when using SEH on Windows with MSVC or clang-cl.
85   }
86 };
87 
88 std::mutex &getCrashRecoveryContextMutex() {
89   static std::mutex CrashRecoveryContextMutex;
90   return CrashRecoveryContextMutex;
91 }
92 
93 static bool gCrashRecoveryEnabled = false;
94 
95 sys::ThreadLocal<const CrashRecoveryContext> &getIsRecoveringFromCrash() {
96   static sys::ThreadLocal<const CrashRecoveryContext> IsRecoveringFromCrash;
97   return IsRecoveringFromCrash;
98 }
99 
100 } // namespace
101 
102 static void installExceptionOrSignalHandlers();
103 static void uninstallExceptionOrSignalHandlers();
104 
105 CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() = default;
106 
107 CrashRecoveryContext::CrashRecoveryContext() {
108   // On Windows, if abort() was previously triggered (and caught by a previous
109   // CrashRecoveryContext) the Windows CRT removes our installed signal handler,
110   // so we need to install it again.
111   sys::DisableSystemDialogsOnCrash();
112 }
113 
114 CrashRecoveryContext::~CrashRecoveryContext() {
115   // Reclaim registered resources.
116   CrashRecoveryContextCleanup *i = head;
117   const CrashRecoveryContext *PC = getIsRecoveringFromCrash().get();
118   getIsRecoveringFromCrash().set(this);
119   while (i) {
120     CrashRecoveryContextCleanup *tmp = i;
121     i = tmp->next;
122     tmp->cleanupFired = true;
123     tmp->recoverResources();
124     delete tmp;
125   }
126   getIsRecoveringFromCrash().set(PC);
127 
128   CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
129   delete CRCI;
130 }
131 
132 bool CrashRecoveryContext::isRecoveringFromCrash() {
133   return getIsRecoveringFromCrash().get() != nullptr;
134 }
135 
136 CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
137   if (!gCrashRecoveryEnabled)
138     return nullptr;
139 
140   const CrashRecoveryContextImpl *CRCI = getCurrentContext().get();
141   if (!CRCI)
142     return nullptr;
143 
144   return CRCI->CRC;
145 }
146 
147 void CrashRecoveryContext::Enable() {
148   std::lock_guard<std::mutex> L(getCrashRecoveryContextMutex());
149   // FIXME: Shouldn't this be a refcount or something?
150   if (gCrashRecoveryEnabled)
151     return;
152   gCrashRecoveryEnabled = true;
153   installExceptionOrSignalHandlers();
154 }
155 
156 void CrashRecoveryContext::Disable() {
157   std::lock_guard<std::mutex> L(getCrashRecoveryContextMutex());
158   if (!gCrashRecoveryEnabled)
159     return;
160   gCrashRecoveryEnabled = false;
161   uninstallExceptionOrSignalHandlers();
162 }
163 
164 void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup)
165 {
166   if (!cleanup)
167     return;
168   if (head)
169     head->prev = cleanup;
170   cleanup->next = head;
171   head = cleanup;
172 }
173 
174 void
175 CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
176   if (!cleanup)
177     return;
178   if (cleanup == head) {
179     head = cleanup->next;
180     if (head)
181       head->prev = nullptr;
182   }
183   else {
184     cleanup->prev->next = cleanup->next;
185     if (cleanup->next)
186       cleanup->next->prev = cleanup->prev;
187   }
188   delete cleanup;
189 }
190 
191 #if defined(_MSC_VER)
192 
193 #include <windows.h> // for GetExceptionInformation
194 
195 // If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way
196 // better than VEH. Vectored exception handling catches all exceptions happening
197 // on the thread with installed exception handlers, so it can interfere with
198 // internal exception handling of other libraries on that thread. SEH works
199 // exactly as you would expect normal exception handling to work: it only
200 // catches exceptions if they would bubble out from the stack frame with __try /
201 // __except.
202 
203 static void installExceptionOrSignalHandlers() {}
204 static void uninstallExceptionOrSignalHandlers() {}
205 
206 // We need this function because the call to GetExceptionInformation() can only
207 // occur inside the __except evaluation block
208 static int ExceptionFilter(_EXCEPTION_POINTERS *Except) {
209   // Lookup the current thread local recovery object.
210   const CrashRecoveryContextImpl *CRCI = getCurrentContext().get();
211 
212   if (!CRCI) {
213     // Something has gone horribly wrong, so let's just tell everyone
214     // to keep searching
215     CrashRecoveryContext::Disable();
216     return EXCEPTION_CONTINUE_SEARCH;
217   }
218 
219   int RetCode = (int)Except->ExceptionRecord->ExceptionCode;
220   if ((RetCode & 0xF0000000) == 0xE0000000)
221     RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit
222 
223   // Handle the crash
224   const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(
225       RetCode, reinterpret_cast<uintptr_t>(Except));
226 
227   return EXCEPTION_EXECUTE_HANDLER;
228 }
229 
230 #if defined(__clang__) && defined(_M_IX86)
231 // Work around PR44697.
232 __attribute__((optnone))
233 #endif
234 bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
235   if (!gCrashRecoveryEnabled) {
236     Fn();
237     return true;
238   }
239   assert(!Impl && "Crash recovery context already initialized!");
240   Impl = new CrashRecoveryContextImpl(this);
241   __try {
242     Fn();
243   } __except (ExceptionFilter(GetExceptionInformation())) {
244     return false;
245   }
246   return true;
247 }
248 
249 #else // !_MSC_VER
250 
251 #if defined(_WIN32)
252 // This is a non-MSVC compiler, probably mingw gcc or clang without
253 // -fms-extensions. Use vectored exception handling (VEH).
254 //
255 // On Windows, we can make use of vectored exception handling to catch most
256 // crashing situations.  Note that this does mean we will be alerted of
257 // exceptions *before* structured exception handling has the opportunity to
258 // catch it. Unfortunately, this causes problems in practice with other code
259 // running on threads with LLVM crash recovery contexts, so we would like to
260 // eventually move away from VEH.
261 //
262 // Vectored works on a per-thread basis, which is an advantage over
263 // SetUnhandledExceptionFilter. SetUnhandledExceptionFilter also doesn't have
264 // any native support for chaining exception handlers, but VEH allows more than
265 // one.
266 //
267 // The vectored exception handler functionality was added in Windows
268 // XP, so if support for older versions of Windows is required,
269 // it will have to be added.
270 
271 #include "llvm/Support/Windows/WindowsSupport.h"
272 
273 static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
274 {
275   // DBG_PRINTEXCEPTION_WIDE_C is not properly defined on all supported
276   // compilers and platforms, so we define it manually.
277   constexpr ULONG DbgPrintExceptionWideC = 0x4001000AL;
278   switch (ExceptionInfo->ExceptionRecord->ExceptionCode)
279   {
280   case DBG_PRINTEXCEPTION_C:
281   case DbgPrintExceptionWideC:
282   case 0x406D1388:  // set debugger thread name
283     return EXCEPTION_CONTINUE_EXECUTION;
284   }
285 
286   // Lookup the current thread local recovery object.
287   const CrashRecoveryContextImpl *CRCI = getCurrentContext().get();
288 
289   if (!CRCI) {
290     // Something has gone horribly wrong, so let's just tell everyone
291     // to keep searching
292     CrashRecoveryContext::Disable();
293     return EXCEPTION_CONTINUE_SEARCH;
294   }
295 
296   // TODO: We can capture the stack backtrace here and store it on the
297   // implementation if we so choose.
298 
299   int RetCode = (int)ExceptionInfo->ExceptionRecord->ExceptionCode;
300   if ((RetCode & 0xF0000000) == 0xE0000000)
301     RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit
302 
303   // Handle the crash
304   const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(
305       RetCode, reinterpret_cast<uintptr_t>(ExceptionInfo));
306 
307   // Note that we don't actually get here because HandleCrash calls
308   // longjmp, which means the HandleCrash function never returns.
309   llvm_unreachable("Handled the crash, should have longjmp'ed out of here");
310 }
311 
312 // Because the Enable and Disable calls are static, it means that
313 // there may not actually be an Impl available, or even a current
314 // CrashRecoveryContext at all.  So we make use of a thread-local
315 // exception table.  The handles contained in here will either be
316 // non-NULL, valid VEH handles, or NULL.
317 static sys::ThreadLocal<const void> sCurrentExceptionHandle;
318 
319 static void installExceptionOrSignalHandlers() {
320   // We can set up vectored exception handling now.  We will install our
321   // handler as the front of the list, though there's no assurances that
322   // it will remain at the front (another call could install itself before
323   // our handler).  This 1) isn't likely, and 2) shouldn't cause problems.
324   PVOID handle = ::AddVectoredExceptionHandler(1, ExceptionHandler);
325   sCurrentExceptionHandle.set(handle);
326 }
327 
328 static void uninstallExceptionOrSignalHandlers() {
329   PVOID currentHandle = const_cast<PVOID>(sCurrentExceptionHandle.get());
330   if (currentHandle) {
331     // Now we can remove the vectored exception handler from the chain
332     ::RemoveVectoredExceptionHandler(currentHandle);
333 
334     // Reset the handle in our thread-local set.
335     sCurrentExceptionHandle.set(NULL);
336   }
337 }
338 
339 #else // !_WIN32
340 
341 // Generic POSIX implementation.
342 //
343 // This implementation relies on synchronous signals being delivered to the
344 // current thread. We use a thread local object to keep track of the active
345 // crash recovery context, and install signal handlers to invoke HandleCrash on
346 // the active object.
347 //
348 // This implementation does not attempt to chain signal handlers in any
349 // reliable fashion -- if we get a signal outside of a crash recovery context we
350 // simply disable crash recovery and raise the signal again.
351 
352 #include <signal.h>
353 
354 static const int Signals[] =
355     { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
356 static const unsigned NumSignals = std::size(Signals);
357 static struct sigaction PrevActions[NumSignals];
358 
359 static void CrashRecoverySignalHandler(int Signal) {
360   // Lookup the current thread local recovery object.
361   const CrashRecoveryContextImpl *CRCI = getCurrentContext().get();
362 
363   if (!CRCI) {
364     // We didn't find a crash recovery context -- this means either we got a
365     // signal on a thread we didn't expect it on, the application got a signal
366     // outside of a crash recovery context, or something else went horribly
367     // wrong.
368     //
369     // Disable crash recovery and raise the signal again. The assumption here is
370     // that the enclosing application will terminate soon, and we won't want to
371     // attempt crash recovery again.
372     //
373     // This call of Disable isn't thread safe, but it doesn't actually matter.
374     CrashRecoveryContext::Disable();
375     raise(Signal);
376 
377     // The signal will be thrown once the signal mask is restored.
378     return;
379   }
380 
381   // Unblock the signal we received.
382   sigset_t SigMask;
383   sigemptyset(&SigMask);
384   sigaddset(&SigMask, Signal);
385   sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);
386 
387   // Return the same error code as if the program crashed, as mentioned in the
388   // section "Exit Status for Commands":
389   // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
390   int RetCode = 128 + Signal;
391 
392   // Don't consider a broken pipe as a crash (see clang/lib/Driver/Driver.cpp)
393   if (Signal == SIGPIPE)
394     RetCode = EX_IOERR;
395 
396   if (CRCI)
397     const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(RetCode, Signal);
398 }
399 
400 static void installExceptionOrSignalHandlers() {
401   // Setup the signal handler.
402   struct sigaction Handler;
403   Handler.sa_handler = CrashRecoverySignalHandler;
404   Handler.sa_flags = 0;
405   sigemptyset(&Handler.sa_mask);
406 
407   for (unsigned i = 0; i != NumSignals; ++i) {
408     sigaction(Signals[i], &Handler, &PrevActions[i]);
409   }
410 }
411 
412 static void uninstallExceptionOrSignalHandlers() {
413   // Restore the previous signal handlers.
414   for (unsigned i = 0; i != NumSignals; ++i)
415     sigaction(Signals[i], &PrevActions[i], nullptr);
416 }
417 
418 #endif // !_WIN32
419 
420 bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
421   // If crash recovery is disabled, do nothing.
422   if (gCrashRecoveryEnabled) {
423     assert(!Impl && "Crash recovery context already initialized!");
424     CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
425     Impl = CRCI;
426 
427     CRCI->ValidJumpBuffer = true;
428     if (setjmp(CRCI->JumpBuffer) != 0) {
429       return false;
430     }
431   }
432 
433   Fn();
434   return true;
435 }
436 
437 #endif // !_MSC_VER
438 
439 [[noreturn]] void CrashRecoveryContext::HandleExit(int RetCode) {
440 #if defined(_WIN32)
441   // SEH and VEH
442   ::RaiseException(0xE0000000 | RetCode, 0, 0, NULL);
443 #else
444   // On Unix we don't need to raise an exception, we go directly to
445   // HandleCrash(), then longjmp will unwind the stack for us.
446   CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *)Impl;
447   assert(CRCI && "Crash recovery context never initialized!");
448   CRCI->HandleCrash(RetCode, 0 /*no sig num*/);
449 #endif
450   llvm_unreachable("Most likely setjmp wasn't called!");
451 }
452 
453 bool CrashRecoveryContext::isCrash(int RetCode) {
454 #if defined(_WIN32)
455   // On Windows, the high bits are reserved for kernel return codes. Values
456   // starting with 0x80000000 are reserved for "warnings"; values of 0xC0000000
457   // and up are for "errors". In practice, both are interpreted as a
458   // non-continuable signal.
459   unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28;
460   if (Code != 0xC && Code != 8)
461     return false;
462 #else
463   // On Unix, signals are represented by return codes of 128 or higher.
464   // Exit code 128 is a reserved value and should not be raised as a signal.
465   if (RetCode <= 128)
466     return false;
467 #endif
468   return true;
469 }
470 
471 bool CrashRecoveryContext::throwIfCrash(int RetCode) {
472   if (!isCrash(RetCode))
473     return false;
474 #if defined(_WIN32)
475   ::RaiseException(RetCode, 0, 0, NULL);
476 #else
477   llvm::sys::unregisterHandlers();
478   raise(RetCode - 128);
479 #endif
480   return true;
481 }
482 
483 // FIXME: Portability.
484 static void setThreadBackgroundPriority() {
485 #ifdef __APPLE__
486   setpriority(PRIO_DARWIN_THREAD, 0, PRIO_DARWIN_BG);
487 #endif
488 }
489 
490 static bool hasThreadBackgroundPriority() {
491 #ifdef __APPLE__
492   return getpriority(PRIO_DARWIN_THREAD, 0) == 1;
493 #else
494   return false;
495 #endif
496 }
497 
498 namespace {
499 struct RunSafelyOnThreadInfo {
500   function_ref<void()> Fn;
501   CrashRecoveryContext *CRC;
502   bool UseBackgroundPriority;
503   bool Result;
504 };
505 } // namespace
506 
507 static void RunSafelyOnThread_Dispatch(void *UserData) {
508   RunSafelyOnThreadInfo *Info =
509     reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
510 
511   if (Info->UseBackgroundPriority)
512     setThreadBackgroundPriority();
513 
514   Info->Result = Info->CRC->RunSafely(Info->Fn);
515 }
516 bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn,
517                                              unsigned RequestedStackSize) {
518   bool UseBackgroundPriority = hasThreadBackgroundPriority();
519   RunSafelyOnThreadInfo Info = { Fn, this, UseBackgroundPriority, false };
520   llvm::thread Thread(RequestedStackSize == 0
521                           ? std::nullopt
522                           : std::optional<unsigned>(RequestedStackSize),
523                       RunSafelyOnThread_Dispatch, &Info);
524   Thread.join();
525 
526   if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl)
527     CRC->setSwitchedThread();
528   return Info.Result;
529 }
530