xref: /openbsd-src/gnu/llvm/libcxxabi/src/cxa_guard_impl.h (revision f1dd7b858388b4a23f4f67a4957ec5ff656ebbe8)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
9 #define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
10 
11 /* cxa_guard_impl.h - Implements the C++ runtime support for function local
12  * static guards.
13  * The layout of the guard object is the same across ARM and Itanium.
14  *
15  * The first "guard byte" (which is checked by the compiler) is set only upon
16  * the completion of cxa release.
17  *
18  * The second "init byte" does the rest of the bookkeeping. It tracks if
19  * initialization is complete or pending, and if there are waiting threads.
20  *
21  * If the guard variable is 64-bits and the platforms supplies a 32-bit thread
22  * identifier, it is used to detect recursive initialization. The thread ID of
23  * the thread currently performing initialization is stored in the second word.
24  *
25  *  Guard Object Layout:
26  * -------------------------------------------------------------------------
27  * |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... |
28  * ------------------------------------------------------------------------
29  *
30  *  Access Protocol:
31  *    For each implementation the guard byte is checked and set before accessing
32  *    the init byte.
33  *
34  *  Overall Design:
35  *    The implementation was designed to allow each implementation to be tested
36  *    independent of the C++ runtime or platform support.
37  *
38  */
39 
40 #include "__cxxabi_config.h"
41 #include "include/atomic_support.h"
42 #include <unistd.h>
43 #if defined(__has_include)
44 # if __has_include(<sys/syscall.h>)
45 #   include <sys/syscall.h>
46 # endif
47 #endif
48 
49 #include <stdlib.h>
50 #include <__threading_support>
51 #ifndef _LIBCXXABI_HAS_NO_THREADS
52 #if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB)
53 #pragma comment(lib, "pthread")
54 #endif
55 #endif
56 
57 // To make testing possible, this header is included from both cxa_guard.cpp
58 // and a number of tests.
59 //
60 // For this reason we place everything in an anonymous namespace -- even though
61 // we're in a header. We want the actual implementation and the tests to have
62 // unique definitions of the types in this header (since the tests may depend
63 // on function local statics).
64 //
65 // To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be
66 // defined when including this file. Only `src/cxa_guard.cpp` should define
67 // the former.
68 #ifdef BUILDING_CXA_GUARD
69 # include "abort_message.h"
70 # define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__)
71 #elif defined(TESTING_CXA_GUARD)
72 # define ABORT_WITH_MESSAGE(...) ::abort()
73 #else
74 # error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined"
75 #endif
76 
77 #if __has_feature(thread_sanitizer)
78 extern "C" void __tsan_acquire(void*);
79 extern "C" void __tsan_release(void*);
80 #else
81 #define __tsan_acquire(addr) ((void)0)
82 #define __tsan_release(addr) ((void)0)
83 #endif
84 
85 namespace __cxxabiv1 {
86 // Use an anonymous namespace to ensure that the tests and actual implementation
87 // have unique definitions of these symbols.
88 namespace {
89 
90 //===----------------------------------------------------------------------===//
91 //                          Misc Utilities
92 //===----------------------------------------------------------------------===//
93 
94 template <class T, T(*Init)()>
95 struct LazyValue {
96   LazyValue() : is_init(false) {}
97 
98   T& get() {
99     if (!is_init) {
100       value = Init();
101       is_init = true;
102     }
103     return value;
104   }
105  private:
106   T value;
107   bool is_init = false;
108 };
109 
110 template <class IntType>
111 class AtomicInt {
112 public:
113   using MemoryOrder = std::__libcpp_atomic_order;
114 
115   explicit AtomicInt(IntType *b) : b(b) {}
116   AtomicInt(AtomicInt const&) = delete;
117   AtomicInt& operator=(AtomicInt const&) = delete;
118 
119   IntType load(MemoryOrder ord) {
120     return std::__libcpp_atomic_load(b, ord);
121   }
122   void store(IntType val, MemoryOrder ord) {
123     std::__libcpp_atomic_store(b, val, ord);
124   }
125   IntType exchange(IntType new_val, MemoryOrder ord) {
126     return std::__libcpp_atomic_exchange(b, new_val, ord);
127   }
128   bool compare_exchange(IntType *expected, IntType desired, MemoryOrder ord_success, MemoryOrder ord_failure) {
129     return std::__libcpp_atomic_compare_exchange(b, expected, desired, ord_success, ord_failure);
130   }
131 
132 private:
133   IntType *b;
134 };
135 
136 //===----------------------------------------------------------------------===//
137 //                       PlatformGetThreadID
138 //===----------------------------------------------------------------------===//
139 
140 #if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
141 uint32_t PlatformThreadID() {
142   static_assert(sizeof(mach_port_t) == sizeof(uint32_t), "");
143   return static_cast<uint32_t>(
144       pthread_mach_thread_np(std::__libcpp_thread_get_current_id()));
145 }
146 #elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
147 uint32_t PlatformThreadID() {
148   static_assert(sizeof(pid_t) == sizeof(uint32_t), "");
149   return static_cast<uint32_t>(syscall(SYS_gettid));
150 }
151 #else
152 constexpr uint32_t (*PlatformThreadID)() = nullptr;
153 #endif
154 
155 
156 constexpr bool PlatformSupportsThreadID() {
157 #ifdef __clang__
158 #pragma clang diagnostic push
159 #pragma clang diagnostic ignored "-Wtautological-pointer-compare"
160 #endif
161   return +PlatformThreadID != nullptr;
162 #ifdef __clang__
163 #pragma clang diagnostic pop
164 #endif
165 }
166 
167 //===----------------------------------------------------------------------===//
168 //                          GuardBase
169 //===----------------------------------------------------------------------===//
170 
171 enum class AcquireResult {
172   INIT_IS_DONE,
173   INIT_IS_PENDING,
174 };
175 constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE;
176 constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING;
177 
178 static constexpr uint8_t UNSET = 0;
179 static constexpr uint8_t COMPLETE_BIT = (1 << 0);
180 static constexpr uint8_t PENDING_BIT = (1 << 1);
181 static constexpr uint8_t WAITING_BIT = (1 << 2);
182 
183 template <class Derived>
184 struct GuardObject {
185   GuardObject() = delete;
186   GuardObject(GuardObject const&) = delete;
187   GuardObject& operator=(GuardObject const&) = delete;
188 
189   explicit GuardObject(uint32_t* g)
190       : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
191         init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
192         thread_id_address(nullptr) {}
193 
194   explicit GuardObject(uint64_t* g)
195       : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
196         init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
197         thread_id_address(reinterpret_cast<uint32_t*>(g) + 1) {}
198 
199 public:
200   /// Implements __cxa_guard_acquire
201   AcquireResult cxa_guard_acquire() {
202     AtomicInt<uint8_t> guard_byte(guard_byte_address);
203     if (guard_byte.load(std::_AO_Acquire) != UNSET)
204       return INIT_IS_DONE;
205     return derived()->acquire_init_byte();
206   }
207 
208   /// Implements __cxa_guard_release
209   void cxa_guard_release() {
210     AtomicInt<uint8_t> guard_byte(guard_byte_address);
211     // Store complete first, so that when release wakes other folks, they see
212     // it as having been completed.
213     guard_byte.store(COMPLETE_BIT, std::_AO_Release);
214     derived()->release_init_byte();
215   }
216 
217   /// Implements __cxa_guard_abort
218   void cxa_guard_abort() { derived()->abort_init_byte(); }
219 
220 public:
221   /// base_address - the address of the original guard object.
222   void* const base_address;
223   /// The address of the guard byte at offset 0.
224   uint8_t* const guard_byte_address;
225   /// The address of the byte used by the implementation during initialization.
226   uint8_t* const init_byte_address;
227   /// An optional address storing an identifier for the thread performing initialization.
228   /// It's used to detect recursive initialization.
229   uint32_t* const thread_id_address;
230 
231 private:
232   Derived* derived() { return static_cast<Derived*>(this); }
233 };
234 
235 //===----------------------------------------------------------------------===//
236 //                    Single Threaded Implementation
237 //===----------------------------------------------------------------------===//
238 
239 struct InitByteNoThreads : GuardObject<InitByteNoThreads> {
240   using GuardObject::GuardObject;
241 
242   AcquireResult acquire_init_byte() {
243     if (*init_byte_address == COMPLETE_BIT)
244       return INIT_IS_DONE;
245     if (*init_byte_address & PENDING_BIT)
246       ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
247     *init_byte_address = PENDING_BIT;
248     return INIT_IS_PENDING;
249   }
250 
251   void release_init_byte() { *init_byte_address = COMPLETE_BIT; }
252   void abort_init_byte() { *init_byte_address = UNSET; }
253 };
254 
255 
256 //===----------------------------------------------------------------------===//
257 //                     Global Mutex Implementation
258 //===----------------------------------------------------------------------===//
259 
260 struct LibcppMutex;
261 struct LibcppCondVar;
262 
263 #ifndef _LIBCXXABI_HAS_NO_THREADS
264 struct LibcppMutex {
265   LibcppMutex() = default;
266   LibcppMutex(LibcppMutex const&) = delete;
267   LibcppMutex& operator=(LibcppMutex const&) = delete;
268 
269   bool lock() { return std::__libcpp_mutex_lock(&mutex); }
270   bool unlock() { return std::__libcpp_mutex_unlock(&mutex); }
271 
272 private:
273   friend struct LibcppCondVar;
274   std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER;
275 };
276 
277 struct LibcppCondVar {
278   LibcppCondVar() = default;
279   LibcppCondVar(LibcppCondVar const&) = delete;
280   LibcppCondVar& operator=(LibcppCondVar const&) = delete;
281 
282   bool wait(LibcppMutex& mut) {
283     return std::__libcpp_condvar_wait(&cond, &mut.mutex);
284   }
285   bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); }
286 
287 private:
288   std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER;
289 };
290 #else
291 struct LibcppMutex {};
292 struct LibcppCondVar {};
293 #endif // !defined(_LIBCXXABI_HAS_NO_THREADS)
294 
295 
296 template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond,
297           uint32_t (*GetThreadID)() = PlatformThreadID>
298 struct InitByteGlobalMutex
299     : GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond,
300                                     GetThreadID>> {
301 
302   using BaseT = typename InitByteGlobalMutex::GuardObject;
303   using BaseT::BaseT;
304 
305   explicit InitByteGlobalMutex(uint32_t *g)
306     : BaseT(g), has_thread_id_support(false) {}
307   explicit InitByteGlobalMutex(uint64_t *g)
308     : BaseT(g), has_thread_id_support(PlatformSupportsThreadID()) {}
309 
310 public:
311   AcquireResult acquire_init_byte() {
312     LockGuard g("__cxa_guard_acquire");
313     // Check for possible recursive initialization.
314     if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) {
315       if (*thread_id_address == current_thread_id.get())
316        ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
317     }
318 
319     // Wait until the pending bit is not set.
320     while (*init_byte_address & PENDING_BIT) {
321       *init_byte_address |= WAITING_BIT;
322       global_cond.wait(global_mutex);
323     }
324 
325     if (*init_byte_address == COMPLETE_BIT)
326       return INIT_IS_DONE;
327 
328     if (has_thread_id_support)
329       *thread_id_address = current_thread_id.get();
330 
331     *init_byte_address = PENDING_BIT;
332     return INIT_IS_PENDING;
333   }
334 
335   void release_init_byte() {
336     bool has_waiting;
337     {
338       LockGuard g("__cxa_guard_release");
339       has_waiting = *init_byte_address & WAITING_BIT;
340       *init_byte_address = COMPLETE_BIT;
341     }
342     if (has_waiting) {
343       if (global_cond.broadcast()) {
344         ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_release");
345       }
346     }
347   }
348 
349   void abort_init_byte() {
350     bool has_waiting;
351     {
352       LockGuard g("__cxa_guard_abort");
353       if (has_thread_id_support)
354         *thread_id_address = 0;
355       has_waiting = *init_byte_address & WAITING_BIT;
356       *init_byte_address = UNSET;
357     }
358     if (has_waiting) {
359       if (global_cond.broadcast()) {
360         ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_abort");
361       }
362     }
363   }
364 
365 private:
366   using BaseT::init_byte_address;
367   using BaseT::thread_id_address;
368   const bool has_thread_id_support;
369   LazyValue<uint32_t, GetThreadID> current_thread_id;
370 
371 private:
372   struct LockGuard {
373     LockGuard() = delete;
374     LockGuard(LockGuard const&) = delete;
375     LockGuard& operator=(LockGuard const&) = delete;
376 
377     explicit LockGuard(const char* calling_func)
378         : calling_func(calling_func)  {
379       if (global_mutex.lock())
380         ABORT_WITH_MESSAGE("%s failed to acquire mutex", calling_func);
381     }
382 
383     ~LockGuard() {
384       if (global_mutex.unlock())
385         ABORT_WITH_MESSAGE("%s failed to release mutex", calling_func);
386     }
387 
388   private:
389     const char* const calling_func;
390   };
391 };
392 
393 //===----------------------------------------------------------------------===//
394 //                         Futex Implementation
395 //===----------------------------------------------------------------------===//
396 
397 #if defined(SYS_futex)
398 void PlatformFutexWait(int* addr, int expect) {
399   constexpr int WAIT = 0;
400   syscall(SYS_futex, addr, WAIT, expect, 0);
401   __tsan_acquire(addr);
402 }
403 void PlatformFutexWake(int* addr) {
404   constexpr int WAKE = 1;
405   __tsan_release(addr);
406   syscall(SYS_futex, addr, WAKE, INT_MAX);
407 }
408 #else
409 constexpr void (*PlatformFutexWait)(int*, int) = nullptr;
410 constexpr void (*PlatformFutexWake)(int*) = nullptr;
411 #endif
412 
413 constexpr bool PlatformSupportsFutex() {
414 #ifdef __clang__
415 #pragma clang diagnostic push
416 #pragma clang diagnostic ignored "-Wtautological-pointer-compare"
417 #endif
418   return +PlatformFutexWait != nullptr;
419 #ifdef __clang__
420 #pragma clang diagnostic pop
421 #endif
422 }
423 
424 /// InitByteFutex - Manages initialization using atomics and the futex syscall
425 /// for waiting and waking.
426 template <void (*Wait)(int*, int) = PlatformFutexWait,
427           void (*Wake)(int*) = PlatformFutexWake,
428           uint32_t (*GetThreadIDArg)() = PlatformThreadID>
429 struct InitByteFutex : GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>> {
430   using BaseT = typename InitByteFutex::GuardObject;
431 
432   /// ARM Constructor
433   explicit InitByteFutex(uint32_t *g) : BaseT(g),
434     init_byte(this->init_byte_address),
435     has_thread_id_support(this->thread_id_address && GetThreadIDArg),
436     thread_id(this->thread_id_address) {}
437 
438   /// Itanium Constructor
439   explicit InitByteFutex(uint64_t *g) : BaseT(g),
440     init_byte(this->init_byte_address),
441     has_thread_id_support(this->thread_id_address && GetThreadIDArg),
442     thread_id(this->thread_id_address) {}
443 
444 public:
445   AcquireResult acquire_init_byte() {
446     while (true) {
447       uint8_t last_val = UNSET;
448       if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel,
449                                      std::_AO_Acquire)) {
450         if (has_thread_id_support) {
451           thread_id.store(current_thread_id.get(), std::_AO_Relaxed);
452         }
453         return INIT_IS_PENDING;
454       }
455 
456       if (last_val == COMPLETE_BIT)
457         return INIT_IS_DONE;
458 
459       if (last_val & PENDING_BIT) {
460 
461         // Check for recursive initialization
462         if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) {
463             ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
464         }
465 
466         if ((last_val & WAITING_BIT) == 0) {
467           // This compare exchange can fail for several reasons
468           // (1) another thread finished the whole thing before we got here
469           // (2) another thread set the waiting bit we were trying to thread
470           // (3) another thread had an exception and failed to finish
471           if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT,
472                                           std::_AO_Acq_Rel, std::_AO_Release)) {
473             // (1) success, via someone else's work!
474             if (last_val == COMPLETE_BIT)
475               return INIT_IS_DONE;
476 
477             // (3) someone else, bailed on doing the work, retry from the start!
478             if (last_val == UNSET)
479               continue;
480 
481             // (2) the waiting bit got set, so we are happy to keep waiting
482           }
483         }
484         wait_on_initialization();
485       }
486     }
487   }
488 
489   void release_init_byte() {
490     uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel);
491     if (old & WAITING_BIT)
492       wake_all();
493   }
494 
495   void abort_init_byte() {
496     if (has_thread_id_support)
497       thread_id.store(0, std::_AO_Relaxed);
498 
499     uint8_t old = init_byte.exchange(0, std::_AO_Acq_Rel);
500     if (old & WAITING_BIT)
501       wake_all();
502   }
503 
504 private:
505   /// Use the futex to wait on the current guard variable. Futex expects a
506   /// 32-bit 4-byte aligned address as the first argument, so we have to use use
507   /// the base address of the guard variable (not the init byte).
508   void wait_on_initialization() {
509     Wait(static_cast<int*>(this->base_address),
510          expected_value_for_futex(PENDING_BIT | WAITING_BIT));
511   }
512   void wake_all() { Wake(static_cast<int*>(this->base_address)); }
513 
514 private:
515   AtomicInt<uint8_t> init_byte;
516 
517   const bool has_thread_id_support;
518   // Unsafe to use unless has_thread_id_support
519   AtomicInt<uint32_t> thread_id;
520   LazyValue<uint32_t, GetThreadIDArg> current_thread_id;
521 
522   /// Create the expected integer value for futex `wait(int* addr, int expected)`.
523   /// We pass the base address as the first argument, So this function creates
524   /// an zero-initialized integer  with `b` copied at the correct offset.
525   static int expected_value_for_futex(uint8_t b) {
526     int dest_val = 0;
527     std::memcpy(reinterpret_cast<char*>(&dest_val) + 1, &b, 1);
528     return dest_val;
529   }
530 
531   static_assert(Wait != nullptr && Wake != nullptr, "");
532 };
533 
534 //===----------------------------------------------------------------------===//
535 //
536 //===----------------------------------------------------------------------===//
537 
538 template <class T>
539 struct GlobalStatic {
540   static T instance;
541 };
542 template <class T>
543 _LIBCPP_SAFE_STATIC T GlobalStatic<T>::instance = {};
544 
545 enum class Implementation {
546   NoThreads,
547   GlobalLock,
548   Futex
549 };
550 
551 template <Implementation Impl>
552 struct SelectImplementation;
553 
554 template <>
555 struct SelectImplementation<Implementation::NoThreads> {
556   using type = InitByteNoThreads;
557 };
558 
559 template <>
560 struct SelectImplementation<Implementation::GlobalLock> {
561   using type = InitByteGlobalMutex<
562       LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance,
563       GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>;
564 };
565 
566 template <>
567 struct SelectImplementation<Implementation::Futex> {
568   using type =
569       InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>;
570 };
571 
572 // TODO(EricWF): We should prefer the futex implementation when available. But
573 // it should be done in a separate step from adding the implementation.
574 constexpr Implementation CurrentImplementation =
575 #if defined(_LIBCXXABI_HAS_NO_THREADS)
576     Implementation::NoThreads;
577 #elif defined(_LIBCXXABI_USE_FUTEX)
578     Implementation::Futex;
579 #else
580    Implementation::GlobalLock;
581 #endif
582 
583 static_assert(CurrentImplementation != Implementation::Futex
584            || PlatformSupportsFutex(), "Futex selected but not supported");
585 
586 using SelectedImplementation =
587     SelectImplementation<CurrentImplementation>::type;
588 
589 } // end namespace
590 } // end namespace __cxxabiv1
591 
592 #endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
593