1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H 9 #define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H 10 11 /* cxa_guard_impl.h - Implements the C++ runtime support for function local 12 * static guards. 13 * The layout of the guard object is the same across ARM and Itanium. 14 * 15 * The first "guard byte" (which is checked by the compiler) is set only upon 16 * the completion of cxa release. 17 * 18 * The second "init byte" does the rest of the bookkeeping. It tracks if 19 * initialization is complete or pending, and if there are waiting threads. 20 * 21 * If the guard variable is 64-bits and the platforms supplies a 32-bit thread 22 * identifier, it is used to detect recursive initialization. The thread ID of 23 * the thread currently performing initialization is stored in the second word. 24 * 25 * Guard Object Layout: 26 * ------------------------------------------------------------------------- 27 * |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... | 28 * ------------------------------------------------------------------------ 29 * 30 * Access Protocol: 31 * For each implementation the guard byte is checked and set before accessing 32 * the init byte. 33 * 34 * Overall Design: 35 * The implementation was designed to allow each implementation to be tested 36 * independent of the C++ runtime or platform support. 37 * 38 */ 39 40 #include "__cxxabi_config.h" 41 #include "include/atomic_support.h" 42 #include <unistd.h> 43 #if defined(__has_include) 44 # if __has_include(<sys/syscall.h>) 45 # include <sys/syscall.h> 46 # endif 47 #endif 48 49 #include <stdlib.h> 50 #include <__threading_support> 51 #ifndef _LIBCXXABI_HAS_NO_THREADS 52 #if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB) 53 #pragma comment(lib, "pthread") 54 #endif 55 #endif 56 57 // To make testing possible, this header is included from both cxa_guard.cpp 58 // and a number of tests. 59 // 60 // For this reason we place everything in an anonymous namespace -- even though 61 // we're in a header. We want the actual implementation and the tests to have 62 // unique definitions of the types in this header (since the tests may depend 63 // on function local statics). 64 // 65 // To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be 66 // defined when including this file. Only `src/cxa_guard.cpp` should define 67 // the former. 68 #ifdef BUILDING_CXA_GUARD 69 # include "abort_message.h" 70 # define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__) 71 #elif defined(TESTING_CXA_GUARD) 72 # define ABORT_WITH_MESSAGE(...) ::abort() 73 #else 74 # error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined" 75 #endif 76 77 #if __has_feature(thread_sanitizer) 78 extern "C" void __tsan_acquire(void*); 79 extern "C" void __tsan_release(void*); 80 #else 81 #define __tsan_acquire(addr) ((void)0) 82 #define __tsan_release(addr) ((void)0) 83 #endif 84 85 namespace __cxxabiv1 { 86 // Use an anonymous namespace to ensure that the tests and actual implementation 87 // have unique definitions of these symbols. 88 namespace { 89 90 //===----------------------------------------------------------------------===// 91 // Misc Utilities 92 //===----------------------------------------------------------------------===// 93 94 template <class T, T(*Init)()> 95 struct LazyValue { 96 LazyValue() : is_init(false) {} 97 98 T& get() { 99 if (!is_init) { 100 value = Init(); 101 is_init = true; 102 } 103 return value; 104 } 105 private: 106 T value; 107 bool is_init = false; 108 }; 109 110 template <class IntType> 111 class AtomicInt { 112 public: 113 using MemoryOrder = std::__libcpp_atomic_order; 114 115 explicit AtomicInt(IntType *b) : b(b) {} 116 AtomicInt(AtomicInt const&) = delete; 117 AtomicInt& operator=(AtomicInt const&) = delete; 118 119 IntType load(MemoryOrder ord) { 120 return std::__libcpp_atomic_load(b, ord); 121 } 122 void store(IntType val, MemoryOrder ord) { 123 std::__libcpp_atomic_store(b, val, ord); 124 } 125 IntType exchange(IntType new_val, MemoryOrder ord) { 126 return std::__libcpp_atomic_exchange(b, new_val, ord); 127 } 128 bool compare_exchange(IntType *expected, IntType desired, MemoryOrder ord_success, MemoryOrder ord_failure) { 129 return std::__libcpp_atomic_compare_exchange(b, expected, desired, ord_success, ord_failure); 130 } 131 132 private: 133 IntType *b; 134 }; 135 136 //===----------------------------------------------------------------------===// 137 // PlatformGetThreadID 138 //===----------------------------------------------------------------------===// 139 140 #if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) 141 uint32_t PlatformThreadID() { 142 static_assert(sizeof(mach_port_t) == sizeof(uint32_t), ""); 143 return static_cast<uint32_t>( 144 pthread_mach_thread_np(std::__libcpp_thread_get_current_id())); 145 } 146 #elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) 147 uint32_t PlatformThreadID() { 148 static_assert(sizeof(pid_t) == sizeof(uint32_t), ""); 149 return static_cast<uint32_t>(syscall(SYS_gettid)); 150 } 151 #else 152 constexpr uint32_t (*PlatformThreadID)() = nullptr; 153 #endif 154 155 156 constexpr bool PlatformSupportsThreadID() { 157 #ifdef __clang__ 158 #pragma clang diagnostic push 159 #pragma clang diagnostic ignored "-Wtautological-pointer-compare" 160 #endif 161 return +PlatformThreadID != nullptr; 162 #ifdef __clang__ 163 #pragma clang diagnostic pop 164 #endif 165 } 166 167 //===----------------------------------------------------------------------===// 168 // GuardBase 169 //===----------------------------------------------------------------------===// 170 171 enum class AcquireResult { 172 INIT_IS_DONE, 173 INIT_IS_PENDING, 174 }; 175 constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE; 176 constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING; 177 178 static constexpr uint8_t UNSET = 0; 179 static constexpr uint8_t COMPLETE_BIT = (1 << 0); 180 static constexpr uint8_t PENDING_BIT = (1 << 1); 181 static constexpr uint8_t WAITING_BIT = (1 << 2); 182 183 template <class Derived> 184 struct GuardObject { 185 GuardObject() = delete; 186 GuardObject(GuardObject const&) = delete; 187 GuardObject& operator=(GuardObject const&) = delete; 188 189 explicit GuardObject(uint32_t* g) 190 : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)), 191 init_byte_address(reinterpret_cast<uint8_t*>(g) + 1), 192 thread_id_address(nullptr) {} 193 194 explicit GuardObject(uint64_t* g) 195 : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)), 196 init_byte_address(reinterpret_cast<uint8_t*>(g) + 1), 197 thread_id_address(reinterpret_cast<uint32_t*>(g) + 1) {} 198 199 public: 200 /// Implements __cxa_guard_acquire 201 AcquireResult cxa_guard_acquire() { 202 AtomicInt<uint8_t> guard_byte(guard_byte_address); 203 if (guard_byte.load(std::_AO_Acquire) != UNSET) 204 return INIT_IS_DONE; 205 return derived()->acquire_init_byte(); 206 } 207 208 /// Implements __cxa_guard_release 209 void cxa_guard_release() { 210 AtomicInt<uint8_t> guard_byte(guard_byte_address); 211 // Store complete first, so that when release wakes other folks, they see 212 // it as having been completed. 213 guard_byte.store(COMPLETE_BIT, std::_AO_Release); 214 derived()->release_init_byte(); 215 } 216 217 /// Implements __cxa_guard_abort 218 void cxa_guard_abort() { derived()->abort_init_byte(); } 219 220 public: 221 /// base_address - the address of the original guard object. 222 void* const base_address; 223 /// The address of the guard byte at offset 0. 224 uint8_t* const guard_byte_address; 225 /// The address of the byte used by the implementation during initialization. 226 uint8_t* const init_byte_address; 227 /// An optional address storing an identifier for the thread performing initialization. 228 /// It's used to detect recursive initialization. 229 uint32_t* const thread_id_address; 230 231 private: 232 Derived* derived() { return static_cast<Derived*>(this); } 233 }; 234 235 //===----------------------------------------------------------------------===// 236 // Single Threaded Implementation 237 //===----------------------------------------------------------------------===// 238 239 struct InitByteNoThreads : GuardObject<InitByteNoThreads> { 240 using GuardObject::GuardObject; 241 242 AcquireResult acquire_init_byte() { 243 if (*init_byte_address == COMPLETE_BIT) 244 return INIT_IS_DONE; 245 if (*init_byte_address & PENDING_BIT) 246 ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization"); 247 *init_byte_address = PENDING_BIT; 248 return INIT_IS_PENDING; 249 } 250 251 void release_init_byte() { *init_byte_address = COMPLETE_BIT; } 252 void abort_init_byte() { *init_byte_address = UNSET; } 253 }; 254 255 256 //===----------------------------------------------------------------------===// 257 // Global Mutex Implementation 258 //===----------------------------------------------------------------------===// 259 260 struct LibcppMutex; 261 struct LibcppCondVar; 262 263 #ifndef _LIBCXXABI_HAS_NO_THREADS 264 struct LibcppMutex { 265 LibcppMutex() = default; 266 LibcppMutex(LibcppMutex const&) = delete; 267 LibcppMutex& operator=(LibcppMutex const&) = delete; 268 269 bool lock() { return std::__libcpp_mutex_lock(&mutex); } 270 bool unlock() { return std::__libcpp_mutex_unlock(&mutex); } 271 272 private: 273 friend struct LibcppCondVar; 274 std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER; 275 }; 276 277 struct LibcppCondVar { 278 LibcppCondVar() = default; 279 LibcppCondVar(LibcppCondVar const&) = delete; 280 LibcppCondVar& operator=(LibcppCondVar const&) = delete; 281 282 bool wait(LibcppMutex& mut) { 283 return std::__libcpp_condvar_wait(&cond, &mut.mutex); 284 } 285 bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); } 286 287 private: 288 std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER; 289 }; 290 #else 291 struct LibcppMutex {}; 292 struct LibcppCondVar {}; 293 #endif // !defined(_LIBCXXABI_HAS_NO_THREADS) 294 295 296 template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond, 297 uint32_t (*GetThreadID)() = PlatformThreadID> 298 struct InitByteGlobalMutex 299 : GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond, 300 GetThreadID>> { 301 302 using BaseT = typename InitByteGlobalMutex::GuardObject; 303 using BaseT::BaseT; 304 305 explicit InitByteGlobalMutex(uint32_t *g) 306 : BaseT(g), has_thread_id_support(false) {} 307 explicit InitByteGlobalMutex(uint64_t *g) 308 : BaseT(g), has_thread_id_support(PlatformSupportsThreadID()) {} 309 310 public: 311 AcquireResult acquire_init_byte() { 312 LockGuard g("__cxa_guard_acquire"); 313 // Check for possible recursive initialization. 314 if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) { 315 if (*thread_id_address == current_thread_id.get()) 316 ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization"); 317 } 318 319 // Wait until the pending bit is not set. 320 while (*init_byte_address & PENDING_BIT) { 321 *init_byte_address |= WAITING_BIT; 322 global_cond.wait(global_mutex); 323 } 324 325 if (*init_byte_address == COMPLETE_BIT) 326 return INIT_IS_DONE; 327 328 if (has_thread_id_support) 329 *thread_id_address = current_thread_id.get(); 330 331 *init_byte_address = PENDING_BIT; 332 return INIT_IS_PENDING; 333 } 334 335 void release_init_byte() { 336 bool has_waiting; 337 { 338 LockGuard g("__cxa_guard_release"); 339 has_waiting = *init_byte_address & WAITING_BIT; 340 *init_byte_address = COMPLETE_BIT; 341 } 342 if (has_waiting) { 343 if (global_cond.broadcast()) { 344 ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_release"); 345 } 346 } 347 } 348 349 void abort_init_byte() { 350 bool has_waiting; 351 { 352 LockGuard g("__cxa_guard_abort"); 353 if (has_thread_id_support) 354 *thread_id_address = 0; 355 has_waiting = *init_byte_address & WAITING_BIT; 356 *init_byte_address = UNSET; 357 } 358 if (has_waiting) { 359 if (global_cond.broadcast()) { 360 ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_abort"); 361 } 362 } 363 } 364 365 private: 366 using BaseT::init_byte_address; 367 using BaseT::thread_id_address; 368 const bool has_thread_id_support; 369 LazyValue<uint32_t, GetThreadID> current_thread_id; 370 371 private: 372 struct LockGuard { 373 LockGuard() = delete; 374 LockGuard(LockGuard const&) = delete; 375 LockGuard& operator=(LockGuard const&) = delete; 376 377 explicit LockGuard(const char* calling_func) 378 : calling_func(calling_func) { 379 if (global_mutex.lock()) 380 ABORT_WITH_MESSAGE("%s failed to acquire mutex", calling_func); 381 } 382 383 ~LockGuard() { 384 if (global_mutex.unlock()) 385 ABORT_WITH_MESSAGE("%s failed to release mutex", calling_func); 386 } 387 388 private: 389 const char* const calling_func; 390 }; 391 }; 392 393 //===----------------------------------------------------------------------===// 394 // Futex Implementation 395 //===----------------------------------------------------------------------===// 396 397 #if defined(SYS_futex) 398 void PlatformFutexWait(int* addr, int expect) { 399 constexpr int WAIT = 0; 400 syscall(SYS_futex, addr, WAIT, expect, 0); 401 __tsan_acquire(addr); 402 } 403 void PlatformFutexWake(int* addr) { 404 constexpr int WAKE = 1; 405 __tsan_release(addr); 406 syscall(SYS_futex, addr, WAKE, INT_MAX); 407 } 408 #else 409 constexpr void (*PlatformFutexWait)(int*, int) = nullptr; 410 constexpr void (*PlatformFutexWake)(int*) = nullptr; 411 #endif 412 413 constexpr bool PlatformSupportsFutex() { 414 #ifdef __clang__ 415 #pragma clang diagnostic push 416 #pragma clang diagnostic ignored "-Wtautological-pointer-compare" 417 #endif 418 return +PlatformFutexWait != nullptr; 419 #ifdef __clang__ 420 #pragma clang diagnostic pop 421 #endif 422 } 423 424 /// InitByteFutex - Manages initialization using atomics and the futex syscall 425 /// for waiting and waking. 426 template <void (*Wait)(int*, int) = PlatformFutexWait, 427 void (*Wake)(int*) = PlatformFutexWake, 428 uint32_t (*GetThreadIDArg)() = PlatformThreadID> 429 struct InitByteFutex : GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>> { 430 using BaseT = typename InitByteFutex::GuardObject; 431 432 /// ARM Constructor 433 explicit InitByteFutex(uint32_t *g) : BaseT(g), 434 init_byte(this->init_byte_address), 435 has_thread_id_support(this->thread_id_address && GetThreadIDArg), 436 thread_id(this->thread_id_address) {} 437 438 /// Itanium Constructor 439 explicit InitByteFutex(uint64_t *g) : BaseT(g), 440 init_byte(this->init_byte_address), 441 has_thread_id_support(this->thread_id_address && GetThreadIDArg), 442 thread_id(this->thread_id_address) {} 443 444 public: 445 AcquireResult acquire_init_byte() { 446 while (true) { 447 uint8_t last_val = UNSET; 448 if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel, 449 std::_AO_Acquire)) { 450 if (has_thread_id_support) { 451 thread_id.store(current_thread_id.get(), std::_AO_Relaxed); 452 } 453 return INIT_IS_PENDING; 454 } 455 456 if (last_val == COMPLETE_BIT) 457 return INIT_IS_DONE; 458 459 if (last_val & PENDING_BIT) { 460 461 // Check for recursive initialization 462 if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) { 463 ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization"); 464 } 465 466 if ((last_val & WAITING_BIT) == 0) { 467 // This compare exchange can fail for several reasons 468 // (1) another thread finished the whole thing before we got here 469 // (2) another thread set the waiting bit we were trying to thread 470 // (3) another thread had an exception and failed to finish 471 if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT, 472 std::_AO_Acq_Rel, std::_AO_Release)) { 473 // (1) success, via someone else's work! 474 if (last_val == COMPLETE_BIT) 475 return INIT_IS_DONE; 476 477 // (3) someone else, bailed on doing the work, retry from the start! 478 if (last_val == UNSET) 479 continue; 480 481 // (2) the waiting bit got set, so we are happy to keep waiting 482 } 483 } 484 wait_on_initialization(); 485 } 486 } 487 } 488 489 void release_init_byte() { 490 uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel); 491 if (old & WAITING_BIT) 492 wake_all(); 493 } 494 495 void abort_init_byte() { 496 if (has_thread_id_support) 497 thread_id.store(0, std::_AO_Relaxed); 498 499 uint8_t old = init_byte.exchange(0, std::_AO_Acq_Rel); 500 if (old & WAITING_BIT) 501 wake_all(); 502 } 503 504 private: 505 /// Use the futex to wait on the current guard variable. Futex expects a 506 /// 32-bit 4-byte aligned address as the first argument, so we have to use use 507 /// the base address of the guard variable (not the init byte). 508 void wait_on_initialization() { 509 Wait(static_cast<int*>(this->base_address), 510 expected_value_for_futex(PENDING_BIT | WAITING_BIT)); 511 } 512 void wake_all() { Wake(static_cast<int*>(this->base_address)); } 513 514 private: 515 AtomicInt<uint8_t> init_byte; 516 517 const bool has_thread_id_support; 518 // Unsafe to use unless has_thread_id_support 519 AtomicInt<uint32_t> thread_id; 520 LazyValue<uint32_t, GetThreadIDArg> current_thread_id; 521 522 /// Create the expected integer value for futex `wait(int* addr, int expected)`. 523 /// We pass the base address as the first argument, So this function creates 524 /// an zero-initialized integer with `b` copied at the correct offset. 525 static int expected_value_for_futex(uint8_t b) { 526 int dest_val = 0; 527 std::memcpy(reinterpret_cast<char*>(&dest_val) + 1, &b, 1); 528 return dest_val; 529 } 530 531 static_assert(Wait != nullptr && Wake != nullptr, ""); 532 }; 533 534 //===----------------------------------------------------------------------===// 535 // 536 //===----------------------------------------------------------------------===// 537 538 template <class T> 539 struct GlobalStatic { 540 static T instance; 541 }; 542 template <class T> 543 _LIBCPP_SAFE_STATIC T GlobalStatic<T>::instance = {}; 544 545 enum class Implementation { 546 NoThreads, 547 GlobalLock, 548 Futex 549 }; 550 551 template <Implementation Impl> 552 struct SelectImplementation; 553 554 template <> 555 struct SelectImplementation<Implementation::NoThreads> { 556 using type = InitByteNoThreads; 557 }; 558 559 template <> 560 struct SelectImplementation<Implementation::GlobalLock> { 561 using type = InitByteGlobalMutex< 562 LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance, 563 GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>; 564 }; 565 566 template <> 567 struct SelectImplementation<Implementation::Futex> { 568 using type = 569 InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>; 570 }; 571 572 // TODO(EricWF): We should prefer the futex implementation when available. But 573 // it should be done in a separate step from adding the implementation. 574 constexpr Implementation CurrentImplementation = 575 #if defined(_LIBCXXABI_HAS_NO_THREADS) 576 Implementation::NoThreads; 577 #elif defined(_LIBCXXABI_USE_FUTEX) 578 Implementation::Futex; 579 #else 580 Implementation::GlobalLock; 581 #endif 582 583 static_assert(CurrentImplementation != Implementation::Futex 584 || PlatformSupportsFutex(), "Futex selected but not supported"); 585 586 using SelectedImplementation = 587 SelectImplementation<CurrentImplementation>::type; 588 589 } // end namespace 590 } // end namespace __cxxabiv1 591 592 #endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H 593