1*76d0caaeSpatrick 2*76d0caaeSpatrick==================== 3*76d0caaeSpatrick``<atomic>`` Design 4*76d0caaeSpatrick==================== 5*76d0caaeSpatrick 6*76d0caaeSpatrickThere were originally 3 designs under consideration. They differ in where most 7*76d0caaeSpatrickof the implementation work is done. The functionality exposed to the customer 8*76d0caaeSpatrickshould be identical (and conforming) for all three designs. 9*76d0caaeSpatrick 10*76d0caaeSpatrick 11*76d0caaeSpatrickDesign A: Minimal work for the library 12*76d0caaeSpatrick====================================== 13*76d0caaeSpatrickThe compiler supplies all of the intrinsics as described below. This list of 14*76d0caaeSpatrickintrinsics roughly parallels the requirements of the C and C++ atomics proposals. 15*76d0caaeSpatrickThe C and C++ library implementations simply drop through to these intrinsics. 16*76d0caaeSpatrickAnything the platform does not support in hardware, the compiler 17*76d0caaeSpatrickarranges for a (compiler-rt) library call to be made which will do the job with 18*76d0caaeSpatricka mutex, and in this case ignoring the memory ordering parameter (effectively 19*76d0caaeSpatrickimplementing ``memory_order_seq_cst``). 20*76d0caaeSpatrick 21*76d0caaeSpatrickUltimate efficiency is preferred over run time error checking. Undefined 22*76d0caaeSpatrickbehavior is acceptable when the inputs do not conform as defined below. 23*76d0caaeSpatrick 24*76d0caaeSpatrick.. code-block:: cpp 25*76d0caaeSpatrick 26*76d0caaeSpatrick // In every intrinsic signature below, type* atomic_obj may be a pointer to a 27*76d0caaeSpatrick // volatile-qualified type. Memory ordering values map to the following meanings: 28*76d0caaeSpatrick // memory_order_relaxed == 0 29*76d0caaeSpatrick // memory_order_consume == 1 30*76d0caaeSpatrick // memory_order_acquire == 2 31*76d0caaeSpatrick // memory_order_release == 3 32*76d0caaeSpatrick // memory_order_acq_rel == 4 33*76d0caaeSpatrick // memory_order_seq_cst == 5 34*76d0caaeSpatrick 35*76d0caaeSpatrick // type must be trivially copyable 36*76d0caaeSpatrick // type represents a "type argument" 37*76d0caaeSpatrick bool __atomic_is_lock_free(type); 38*76d0caaeSpatrick 39*76d0caaeSpatrick // type must be trivially copyable 40*76d0caaeSpatrick // Behavior is defined for mem_ord = 0, 1, 2, 5 41*76d0caaeSpatrick type __atomic_load(const type* atomic_obj, int mem_ord); 42*76d0caaeSpatrick 43*76d0caaeSpatrick // type must be trivially copyable 44*76d0caaeSpatrick // Behavior is defined for mem_ord = 0, 3, 5 45*76d0caaeSpatrick void __atomic_store(type* atomic_obj, type desired, int mem_ord); 46*76d0caaeSpatrick 47*76d0caaeSpatrick // type must be trivially copyable 48*76d0caaeSpatrick // Behavior is defined for mem_ord = [0 ... 5] 49*76d0caaeSpatrick type __atomic_exchange(type* atomic_obj, type desired, int mem_ord); 50*76d0caaeSpatrick 51*76d0caaeSpatrick // type must be trivially copyable 52*76d0caaeSpatrick // Behavior is defined for mem_success = [0 ... 5], 53*76d0caaeSpatrick // mem_failure <= mem_success 54*76d0caaeSpatrick // mem_failure != 3 55*76d0caaeSpatrick // mem_failure != 4 56*76d0caaeSpatrick bool __atomic_compare_exchange_strong(type* atomic_obj, 57*76d0caaeSpatrick type* expected, type desired, 58*76d0caaeSpatrick int mem_success, int mem_failure); 59*76d0caaeSpatrick 60*76d0caaeSpatrick // type must be trivially copyable 61*76d0caaeSpatrick // Behavior is defined for mem_success = [0 ... 5], 62*76d0caaeSpatrick // mem_failure <= mem_success 63*76d0caaeSpatrick // mem_failure != 3 64*76d0caaeSpatrick // mem_failure != 4 65*76d0caaeSpatrick bool __atomic_compare_exchange_weak(type* atomic_obj, 66*76d0caaeSpatrick type* expected, type desired, 67*76d0caaeSpatrick int mem_success, int mem_failure); 68*76d0caaeSpatrick 69*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 70*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 71*76d0caaeSpatrick // char16_t, char32_t, wchar_t 72*76d0caaeSpatrick // Behavior is defined for mem_ord = [0 ... 5] 73*76d0caaeSpatrick type __atomic_fetch_add(type* atomic_obj, type operand, int mem_ord); 74*76d0caaeSpatrick 75*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 76*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 77*76d0caaeSpatrick // char16_t, char32_t, wchar_t 78*76d0caaeSpatrick // Behavior is defined for mem_ord = [0 ... 5] 79*76d0caaeSpatrick type __atomic_fetch_sub(type* atomic_obj, type operand, int mem_ord); 80*76d0caaeSpatrick 81*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 82*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 83*76d0caaeSpatrick // char16_t, char32_t, wchar_t 84*76d0caaeSpatrick // Behavior is defined for mem_ord = [0 ... 5] 85*76d0caaeSpatrick type __atomic_fetch_and(type* atomic_obj, type operand, int mem_ord); 86*76d0caaeSpatrick 87*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 88*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 89*76d0caaeSpatrick // char16_t, char32_t, wchar_t 90*76d0caaeSpatrick // Behavior is defined for mem_ord = [0 ... 5] 91*76d0caaeSpatrick type __atomic_fetch_or(type* atomic_obj, type operand, int mem_ord); 92*76d0caaeSpatrick 93*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 94*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 95*76d0caaeSpatrick // char16_t, char32_t, wchar_t 96*76d0caaeSpatrick // Behavior is defined for mem_ord = [0 ... 5] 97*76d0caaeSpatrick type __atomic_fetch_xor(type* atomic_obj, type operand, int mem_ord); 98*76d0caaeSpatrick 99*76d0caaeSpatrick // Behavior is defined for mem_ord = [0 ... 5] 100*76d0caaeSpatrick void* __atomic_fetch_add(void** atomic_obj, ptrdiff_t operand, int mem_ord); 101*76d0caaeSpatrick void* __atomic_fetch_sub(void** atomic_obj, ptrdiff_t operand, int mem_ord); 102*76d0caaeSpatrick 103*76d0caaeSpatrick // Behavior is defined for mem_ord = [0 ... 5] 104*76d0caaeSpatrick void __atomic_thread_fence(int mem_ord); 105*76d0caaeSpatrick void __atomic_signal_fence(int mem_ord); 106*76d0caaeSpatrick 107*76d0caaeSpatrickIf desired the intrinsics taking a single ``mem_ord`` parameter can default 108*76d0caaeSpatrickthis argument to 5. 109*76d0caaeSpatrick 110*76d0caaeSpatrickIf desired the intrinsics taking two ordering parameters can default ``mem_success`` 111*76d0caaeSpatrickto 5, and ``mem_failure`` to ``translate_memory_order(mem_success)`` where 112*76d0caaeSpatrick``translate_memory_order(mem_success)`` is defined as: 113*76d0caaeSpatrick 114*76d0caaeSpatrick.. code-block:: cpp 115*76d0caaeSpatrick 116*76d0caaeSpatrick int translate_memory_order(int o) { 117*76d0caaeSpatrick switch (o) { 118*76d0caaeSpatrick case 4: 119*76d0caaeSpatrick return 2; 120*76d0caaeSpatrick case 3: 121*76d0caaeSpatrick return 0; 122*76d0caaeSpatrick } 123*76d0caaeSpatrick return o; 124*76d0caaeSpatrick } 125*76d0caaeSpatrick 126*76d0caaeSpatrickBelow are representative C++ implementations of all of the operations. Their 127*76d0caaeSpatrickpurpose is to document the desired semantics of each operation, assuming 128*76d0caaeSpatrick``memory_order_seq_cst``. This is essentially the code that will be called 129*76d0caaeSpatrickif the front end calls out to compiler-rt. 130*76d0caaeSpatrick 131*76d0caaeSpatrick.. code-block:: cpp 132*76d0caaeSpatrick 133*76d0caaeSpatrick template <class T> 134*76d0caaeSpatrick T __atomic_load(T const volatile* obj) { 135*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 136*76d0caaeSpatrick return *obj; 137*76d0caaeSpatrick } 138*76d0caaeSpatrick 139*76d0caaeSpatrick template <class T> 140*76d0caaeSpatrick void __atomic_store(T volatile* obj, T desr) { 141*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 142*76d0caaeSpatrick *obj = desr; 143*76d0caaeSpatrick } 144*76d0caaeSpatrick 145*76d0caaeSpatrick template <class T> 146*76d0caaeSpatrick T __atomic_exchange(T volatile* obj, T desr) { 147*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 148*76d0caaeSpatrick T r = *obj; 149*76d0caaeSpatrick *obj = desr; 150*76d0caaeSpatrick return r; 151*76d0caaeSpatrick } 152*76d0caaeSpatrick 153*76d0caaeSpatrick template <class T> 154*76d0caaeSpatrick bool __atomic_compare_exchange_strong(T volatile* obj, T* exp, T desr) { 155*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 156*76d0caaeSpatrick if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0) // if (*obj == *exp) 157*76d0caaeSpatrick { 158*76d0caaeSpatrick std::memcpy(const_cast<T*>(obj), &desr, sizeof(T)); // *obj = desr; 159*76d0caaeSpatrick return true; 160*76d0caaeSpatrick } 161*76d0caaeSpatrick std::memcpy(exp, const_cast<T*>(obj), sizeof(T)); // *exp = *obj; 162*76d0caaeSpatrick return false; 163*76d0caaeSpatrick } 164*76d0caaeSpatrick 165*76d0caaeSpatrick // May spuriously return false (even if *obj == *exp) 166*76d0caaeSpatrick template <class T> 167*76d0caaeSpatrick bool __atomic_compare_exchange_weak(T volatile* obj, T* exp, T desr) { 168*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 169*76d0caaeSpatrick if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0) // if (*obj == *exp) 170*76d0caaeSpatrick { 171*76d0caaeSpatrick std::memcpy(const_cast<T*>(obj), &desr, sizeof(T)); // *obj = desr; 172*76d0caaeSpatrick return true; 173*76d0caaeSpatrick } 174*76d0caaeSpatrick std::memcpy(exp, const_cast<T*>(obj), sizeof(T)); // *exp = *obj; 175*76d0caaeSpatrick return false; 176*76d0caaeSpatrick } 177*76d0caaeSpatrick 178*76d0caaeSpatrick template <class T> 179*76d0caaeSpatrick T __atomic_fetch_add(T volatile* obj, T operand) { 180*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 181*76d0caaeSpatrick T r = *obj; 182*76d0caaeSpatrick *obj += operand; 183*76d0caaeSpatrick return r; 184*76d0caaeSpatrick } 185*76d0caaeSpatrick 186*76d0caaeSpatrick template <class T> 187*76d0caaeSpatrick T __atomic_fetch_sub(T volatile* obj, T operand) { 188*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 189*76d0caaeSpatrick T r = *obj; 190*76d0caaeSpatrick *obj -= operand; 191*76d0caaeSpatrick return r; 192*76d0caaeSpatrick } 193*76d0caaeSpatrick 194*76d0caaeSpatrick template <class T> 195*76d0caaeSpatrick T __atomic_fetch_and(T volatile* obj, T operand) { 196*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 197*76d0caaeSpatrick T r = *obj; 198*76d0caaeSpatrick *obj &= operand; 199*76d0caaeSpatrick return r; 200*76d0caaeSpatrick } 201*76d0caaeSpatrick 202*76d0caaeSpatrick template <class T> 203*76d0caaeSpatrick T __atomic_fetch_or(T volatile* obj, T operand) { 204*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 205*76d0caaeSpatrick T r = *obj; 206*76d0caaeSpatrick *obj |= operand; 207*76d0caaeSpatrick return r; 208*76d0caaeSpatrick } 209*76d0caaeSpatrick 210*76d0caaeSpatrick template <class T> 211*76d0caaeSpatrick T __atomic_fetch_xor(T volatile* obj, T operand) { 212*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 213*76d0caaeSpatrick T r = *obj; 214*76d0caaeSpatrick *obj ^= operand; 215*76d0caaeSpatrick return r; 216*76d0caaeSpatrick } 217*76d0caaeSpatrick 218*76d0caaeSpatrick void* __atomic_fetch_add(void* volatile* obj, ptrdiff_t operand) { 219*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 220*76d0caaeSpatrick void* r = *obj; 221*76d0caaeSpatrick (char*&)(*obj) += operand; 222*76d0caaeSpatrick return r; 223*76d0caaeSpatrick } 224*76d0caaeSpatrick 225*76d0caaeSpatrick void* __atomic_fetch_sub(void* volatile* obj, ptrdiff_t operand) { 226*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 227*76d0caaeSpatrick void* r = *obj; 228*76d0caaeSpatrick (char*&)(*obj) -= operand; 229*76d0caaeSpatrick return r; 230*76d0caaeSpatrick } 231*76d0caaeSpatrick 232*76d0caaeSpatrick void __atomic_thread_fence() { 233*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 234*76d0caaeSpatrick } 235*76d0caaeSpatrick 236*76d0caaeSpatrick void __atomic_signal_fence() { 237*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 238*76d0caaeSpatrick } 239*76d0caaeSpatrick 240*76d0caaeSpatrick 241*76d0caaeSpatrickDesign B: Something in between 242*76d0caaeSpatrick============================== 243*76d0caaeSpatrickThis is a variation of design A which puts the burden on the library to arrange 244*76d0caaeSpatrickfor the correct manipulation of the run time memory ordering arguments, and only 245*76d0caaeSpatrickcalls the compiler for well-defined memory orderings. I think of this design as 246*76d0caaeSpatrickthe worst of A and C, instead of the best of A and C. But I offer it as an 247*76d0caaeSpatrickoption in the spirit of completeness. 248*76d0caaeSpatrick 249*76d0caaeSpatrick.. code-block:: cpp 250*76d0caaeSpatrick 251*76d0caaeSpatrick // type must be trivially copyable 252*76d0caaeSpatrick bool __atomic_is_lock_free(const type* atomic_obj); 253*76d0caaeSpatrick 254*76d0caaeSpatrick // type must be trivially copyable 255*76d0caaeSpatrick type __atomic_load_relaxed(const volatile type* atomic_obj); 256*76d0caaeSpatrick type __atomic_load_consume(const volatile type* atomic_obj); 257*76d0caaeSpatrick type __atomic_load_acquire(const volatile type* atomic_obj); 258*76d0caaeSpatrick type __atomic_load_seq_cst(const volatile type* atomic_obj); 259*76d0caaeSpatrick 260*76d0caaeSpatrick // type must be trivially copyable 261*76d0caaeSpatrick type __atomic_store_relaxed(volatile type* atomic_obj, type desired); 262*76d0caaeSpatrick type __atomic_store_release(volatile type* atomic_obj, type desired); 263*76d0caaeSpatrick type __atomic_store_seq_cst(volatile type* atomic_obj, type desired); 264*76d0caaeSpatrick 265*76d0caaeSpatrick // type must be trivially copyable 266*76d0caaeSpatrick type __atomic_exchange_relaxed(volatile type* atomic_obj, type desired); 267*76d0caaeSpatrick type __atomic_exchange_consume(volatile type* atomic_obj, type desired); 268*76d0caaeSpatrick type __atomic_exchange_acquire(volatile type* atomic_obj, type desired); 269*76d0caaeSpatrick type __atomic_exchange_release(volatile type* atomic_obj, type desired); 270*76d0caaeSpatrick type __atomic_exchange_acq_rel(volatile type* atomic_obj, type desired); 271*76d0caaeSpatrick type __atomic_exchange_seq_cst(volatile type* atomic_obj, type desired); 272*76d0caaeSpatrick 273*76d0caaeSpatrick // type must be trivially copyable 274*76d0caaeSpatrick bool __atomic_compare_exchange_strong_relaxed_relaxed(volatile type* atomic_obj, 275*76d0caaeSpatrick type* expected, 276*76d0caaeSpatrick type desired); 277*76d0caaeSpatrick bool __atomic_compare_exchange_strong_consume_relaxed(volatile type* atomic_obj, 278*76d0caaeSpatrick type* expected, 279*76d0caaeSpatrick type desired); 280*76d0caaeSpatrick bool __atomic_compare_exchange_strong_consume_consume(volatile type* atomic_obj, 281*76d0caaeSpatrick type* expected, 282*76d0caaeSpatrick type desired); 283*76d0caaeSpatrick bool __atomic_compare_exchange_strong_acquire_relaxed(volatile type* atomic_obj, 284*76d0caaeSpatrick type* expected, 285*76d0caaeSpatrick type desired); 286*76d0caaeSpatrick bool __atomic_compare_exchange_strong_acquire_consume(volatile type* atomic_obj, 287*76d0caaeSpatrick type* expected, 288*76d0caaeSpatrick type desired); 289*76d0caaeSpatrick bool __atomic_compare_exchange_strong_acquire_acquire(volatile type* atomic_obj, 290*76d0caaeSpatrick type* expected, 291*76d0caaeSpatrick type desired); 292*76d0caaeSpatrick bool __atomic_compare_exchange_strong_release_relaxed(volatile type* atomic_obj, 293*76d0caaeSpatrick type* expected, 294*76d0caaeSpatrick type desired); 295*76d0caaeSpatrick bool __atomic_compare_exchange_strong_release_consume(volatile type* atomic_obj, 296*76d0caaeSpatrick type* expected, 297*76d0caaeSpatrick type desired); 298*76d0caaeSpatrick bool __atomic_compare_exchange_strong_release_acquire(volatile type* atomic_obj, 299*76d0caaeSpatrick type* expected, 300*76d0caaeSpatrick type desired); 301*76d0caaeSpatrick bool __atomic_compare_exchange_strong_acq_rel_relaxed(volatile type* atomic_obj, 302*76d0caaeSpatrick type* expected, 303*76d0caaeSpatrick type desired); 304*76d0caaeSpatrick bool __atomic_compare_exchange_strong_acq_rel_consume(volatile type* atomic_obj, 305*76d0caaeSpatrick type* expected, 306*76d0caaeSpatrick type desired); 307*76d0caaeSpatrick bool __atomic_compare_exchange_strong_acq_rel_acquire(volatile type* atomic_obj, 308*76d0caaeSpatrick type* expected, 309*76d0caaeSpatrick type desired); 310*76d0caaeSpatrick bool __atomic_compare_exchange_strong_seq_cst_relaxed(volatile type* atomic_obj, 311*76d0caaeSpatrick type* expected, 312*76d0caaeSpatrick type desired); 313*76d0caaeSpatrick bool __atomic_compare_exchange_strong_seq_cst_consume(volatile type* atomic_obj, 314*76d0caaeSpatrick type* expected, 315*76d0caaeSpatrick type desired); 316*76d0caaeSpatrick bool __atomic_compare_exchange_strong_seq_cst_acquire(volatile type* atomic_obj, 317*76d0caaeSpatrick type* expected, 318*76d0caaeSpatrick type desired); 319*76d0caaeSpatrick bool __atomic_compare_exchange_strong_seq_cst_seq_cst(volatile type* atomic_obj, 320*76d0caaeSpatrick type* expected, 321*76d0caaeSpatrick type desired); 322*76d0caaeSpatrick 323*76d0caaeSpatrick // type must be trivially copyable 324*76d0caaeSpatrick bool __atomic_compare_exchange_weak_relaxed_relaxed(volatile type* atomic_obj, 325*76d0caaeSpatrick type* expected, 326*76d0caaeSpatrick type desired); 327*76d0caaeSpatrick bool __atomic_compare_exchange_weak_consume_relaxed(volatile type* atomic_obj, 328*76d0caaeSpatrick type* expected, 329*76d0caaeSpatrick type desired); 330*76d0caaeSpatrick bool __atomic_compare_exchange_weak_consume_consume(volatile type* atomic_obj, 331*76d0caaeSpatrick type* expected, 332*76d0caaeSpatrick type desired); 333*76d0caaeSpatrick bool __atomic_compare_exchange_weak_acquire_relaxed(volatile type* atomic_obj, 334*76d0caaeSpatrick type* expected, 335*76d0caaeSpatrick type desired); 336*76d0caaeSpatrick bool __atomic_compare_exchange_weak_acquire_consume(volatile type* atomic_obj, 337*76d0caaeSpatrick type* expected, 338*76d0caaeSpatrick type desired); 339*76d0caaeSpatrick bool __atomic_compare_exchange_weak_acquire_acquire(volatile type* atomic_obj, 340*76d0caaeSpatrick type* expected, 341*76d0caaeSpatrick type desired); 342*76d0caaeSpatrick bool __atomic_compare_exchange_weak_release_relaxed(volatile type* atomic_obj, 343*76d0caaeSpatrick type* expected, 344*76d0caaeSpatrick type desired); 345*76d0caaeSpatrick bool __atomic_compare_exchange_weak_release_consume(volatile type* atomic_obj, 346*76d0caaeSpatrick type* expected, 347*76d0caaeSpatrick type desired); 348*76d0caaeSpatrick bool __atomic_compare_exchange_weak_release_acquire(volatile type* atomic_obj, 349*76d0caaeSpatrick type* expected, 350*76d0caaeSpatrick type desired); 351*76d0caaeSpatrick bool __atomic_compare_exchange_weak_acq_rel_relaxed(volatile type* atomic_obj, 352*76d0caaeSpatrick type* expected, 353*76d0caaeSpatrick type desired); 354*76d0caaeSpatrick bool __atomic_compare_exchange_weak_acq_rel_consume(volatile type* atomic_obj, 355*76d0caaeSpatrick type* expected, 356*76d0caaeSpatrick type desired); 357*76d0caaeSpatrick bool __atomic_compare_exchange_weak_acq_rel_acquire(volatile type* atomic_obj, 358*76d0caaeSpatrick type* expected, 359*76d0caaeSpatrick type desired); 360*76d0caaeSpatrick bool __atomic_compare_exchange_weak_seq_cst_relaxed(volatile type* atomic_obj, 361*76d0caaeSpatrick type* expected, 362*76d0caaeSpatrick type desired); 363*76d0caaeSpatrick bool __atomic_compare_exchange_weak_seq_cst_consume(volatile type* atomic_obj, 364*76d0caaeSpatrick type* expected, 365*76d0caaeSpatrick type desired); 366*76d0caaeSpatrick bool __atomic_compare_exchange_weak_seq_cst_acquire(volatile type* atomic_obj, 367*76d0caaeSpatrick type* expected, 368*76d0caaeSpatrick type desired); 369*76d0caaeSpatrick bool __atomic_compare_exchange_weak_seq_cst_seq_cst(volatile type* atomic_obj, 370*76d0caaeSpatrick type* expected, 371*76d0caaeSpatrick type desired); 372*76d0caaeSpatrick 373*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 374*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 375*76d0caaeSpatrick // char16_t, char32_t, wchar_t 376*76d0caaeSpatrick type __atomic_fetch_add_relaxed(volatile type* atomic_obj, type operand); 377*76d0caaeSpatrick type __atomic_fetch_add_consume(volatile type* atomic_obj, type operand); 378*76d0caaeSpatrick type __atomic_fetch_add_acquire(volatile type* atomic_obj, type operand); 379*76d0caaeSpatrick type __atomic_fetch_add_release(volatile type* atomic_obj, type operand); 380*76d0caaeSpatrick type __atomic_fetch_add_acq_rel(volatile type* atomic_obj, type operand); 381*76d0caaeSpatrick type __atomic_fetch_add_seq_cst(volatile type* atomic_obj, type operand); 382*76d0caaeSpatrick 383*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 384*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 385*76d0caaeSpatrick // char16_t, char32_t, wchar_t 386*76d0caaeSpatrick type __atomic_fetch_sub_relaxed(volatile type* atomic_obj, type operand); 387*76d0caaeSpatrick type __atomic_fetch_sub_consume(volatile type* atomic_obj, type operand); 388*76d0caaeSpatrick type __atomic_fetch_sub_acquire(volatile type* atomic_obj, type operand); 389*76d0caaeSpatrick type __atomic_fetch_sub_release(volatile type* atomic_obj, type operand); 390*76d0caaeSpatrick type __atomic_fetch_sub_acq_rel(volatile type* atomic_obj, type operand); 391*76d0caaeSpatrick type __atomic_fetch_sub_seq_cst(volatile type* atomic_obj, type operand); 392*76d0caaeSpatrick 393*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 394*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 395*76d0caaeSpatrick // char16_t, char32_t, wchar_t 396*76d0caaeSpatrick type __atomic_fetch_and_relaxed(volatile type* atomic_obj, type operand); 397*76d0caaeSpatrick type __atomic_fetch_and_consume(volatile type* atomic_obj, type operand); 398*76d0caaeSpatrick type __atomic_fetch_and_acquire(volatile type* atomic_obj, type operand); 399*76d0caaeSpatrick type __atomic_fetch_and_release(volatile type* atomic_obj, type operand); 400*76d0caaeSpatrick type __atomic_fetch_and_acq_rel(volatile type* atomic_obj, type operand); 401*76d0caaeSpatrick type __atomic_fetch_and_seq_cst(volatile type* atomic_obj, type operand); 402*76d0caaeSpatrick 403*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 404*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 405*76d0caaeSpatrick // char16_t, char32_t, wchar_t 406*76d0caaeSpatrick type __atomic_fetch_or_relaxed(volatile type* atomic_obj, type operand); 407*76d0caaeSpatrick type __atomic_fetch_or_consume(volatile type* atomic_obj, type operand); 408*76d0caaeSpatrick type __atomic_fetch_or_acquire(volatile type* atomic_obj, type operand); 409*76d0caaeSpatrick type __atomic_fetch_or_release(volatile type* atomic_obj, type operand); 410*76d0caaeSpatrick type __atomic_fetch_or_acq_rel(volatile type* atomic_obj, type operand); 411*76d0caaeSpatrick type __atomic_fetch_or_seq_cst(volatile type* atomic_obj, type operand); 412*76d0caaeSpatrick 413*76d0caaeSpatrick // type is one of: char, signed char, unsigned char, short, unsigned short, int, 414*76d0caaeSpatrick // unsigned int, long, unsigned long, long long, unsigned long long, 415*76d0caaeSpatrick // char16_t, char32_t, wchar_t 416*76d0caaeSpatrick type __atomic_fetch_xor_relaxed(volatile type* atomic_obj, type operand); 417*76d0caaeSpatrick type __atomic_fetch_xor_consume(volatile type* atomic_obj, type operand); 418*76d0caaeSpatrick type __atomic_fetch_xor_acquire(volatile type* atomic_obj, type operand); 419*76d0caaeSpatrick type __atomic_fetch_xor_release(volatile type* atomic_obj, type operand); 420*76d0caaeSpatrick type __atomic_fetch_xor_acq_rel(volatile type* atomic_obj, type operand); 421*76d0caaeSpatrick type __atomic_fetch_xor_seq_cst(volatile type* atomic_obj, type operand); 422*76d0caaeSpatrick 423*76d0caaeSpatrick void* __atomic_fetch_add_relaxed(void* volatile* atomic_obj, ptrdiff_t operand); 424*76d0caaeSpatrick void* __atomic_fetch_add_consume(void* volatile* atomic_obj, ptrdiff_t operand); 425*76d0caaeSpatrick void* __atomic_fetch_add_acquire(void* volatile* atomic_obj, ptrdiff_t operand); 426*76d0caaeSpatrick void* __atomic_fetch_add_release(void* volatile* atomic_obj, ptrdiff_t operand); 427*76d0caaeSpatrick void* __atomic_fetch_add_acq_rel(void* volatile* atomic_obj, ptrdiff_t operand); 428*76d0caaeSpatrick void* __atomic_fetch_add_seq_cst(void* volatile* atomic_obj, ptrdiff_t operand); 429*76d0caaeSpatrick 430*76d0caaeSpatrick void* __atomic_fetch_sub_relaxed(void* volatile* atomic_obj, ptrdiff_t operand); 431*76d0caaeSpatrick void* __atomic_fetch_sub_consume(void* volatile* atomic_obj, ptrdiff_t operand); 432*76d0caaeSpatrick void* __atomic_fetch_sub_acquire(void* volatile* atomic_obj, ptrdiff_t operand); 433*76d0caaeSpatrick void* __atomic_fetch_sub_release(void* volatile* atomic_obj, ptrdiff_t operand); 434*76d0caaeSpatrick void* __atomic_fetch_sub_acq_rel(void* volatile* atomic_obj, ptrdiff_t operand); 435*76d0caaeSpatrick void* __atomic_fetch_sub_seq_cst(void* volatile* atomic_obj, ptrdiff_t operand); 436*76d0caaeSpatrick 437*76d0caaeSpatrick void __atomic_thread_fence_relaxed(); 438*76d0caaeSpatrick void __atomic_thread_fence_consume(); 439*76d0caaeSpatrick void __atomic_thread_fence_acquire(); 440*76d0caaeSpatrick void __atomic_thread_fence_release(); 441*76d0caaeSpatrick void __atomic_thread_fence_acq_rel(); 442*76d0caaeSpatrick void __atomic_thread_fence_seq_cst(); 443*76d0caaeSpatrick 444*76d0caaeSpatrick void __atomic_signal_fence_relaxed(); 445*76d0caaeSpatrick void __atomic_signal_fence_consume(); 446*76d0caaeSpatrick void __atomic_signal_fence_acquire(); 447*76d0caaeSpatrick void __atomic_signal_fence_release(); 448*76d0caaeSpatrick void __atomic_signal_fence_acq_rel(); 449*76d0caaeSpatrick void __atomic_signal_fence_seq_cst(); 450*76d0caaeSpatrick 451*76d0caaeSpatrickDesign C: Minimal work for the front end 452*76d0caaeSpatrick======================================== 453*76d0caaeSpatrickThe ``<atomic>`` header is one of the most closely coupled headers to the compiler. 454*76d0caaeSpatrickIdeally when you invoke any function from ``<atomic>``, it should result in highly 455*76d0caaeSpatrickoptimized assembly being inserted directly into your application -- assembly that 456*76d0caaeSpatrickis not otherwise representable by higher level C or C++ expressions. The design of 457*76d0caaeSpatrickthe libc++ ``<atomic>`` header started with this goal in mind. A secondary, but 458*76d0caaeSpatrickstill very important goal is that the compiler should have to do minimal work to 459*76d0caaeSpatrickfacilitate the implementation of ``<atomic>``. Without this second goal, then 460*76d0caaeSpatrickpractically speaking, the libc++ ``<atomic>`` header would be doomed to be a 461*76d0caaeSpatrickbarely supported, second class citizen on almost every platform. 462*76d0caaeSpatrick 463*76d0caaeSpatrickGoals: 464*76d0caaeSpatrick 465*76d0caaeSpatrick- Optimal code generation for atomic operations 466*76d0caaeSpatrick- Minimal effort for the compiler to achieve goal 1 on any given platform 467*76d0caaeSpatrick- Conformance to the C++0X draft standard 468*76d0caaeSpatrick 469*76d0caaeSpatrickThe purpose of this document is to inform compiler writers what they need to do 470*76d0caaeSpatrickto enable a high performance libc++ ``<atomic>`` with minimal effort. 471*76d0caaeSpatrick 472*76d0caaeSpatrickThe minimal work that must be done for a conforming ``<atomic>`` 473*76d0caaeSpatrick---------------------------------------------------------------- 474*76d0caaeSpatrickThe only "atomic" operations that must actually be lock free in 475*76d0caaeSpatrick``<atomic>`` are represented by the following compiler intrinsics: 476*76d0caaeSpatrick 477*76d0caaeSpatrick.. code-block:: cpp 478*76d0caaeSpatrick 479*76d0caaeSpatrick __atomic_flag__ __atomic_exchange_seq_cst(__atomic_flag__ volatile* obj, __atomic_flag__ desr) { 480*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 481*76d0caaeSpatrick __atomic_flag__ result = *obj; 482*76d0caaeSpatrick *obj = desr; 483*76d0caaeSpatrick return result; 484*76d0caaeSpatrick } 485*76d0caaeSpatrick 486*76d0caaeSpatrick void __atomic_store_seq_cst(__atomic_flag__ volatile* obj, __atomic_flag__ desr) { 487*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 488*76d0caaeSpatrick *obj = desr; 489*76d0caaeSpatrick } 490*76d0caaeSpatrick 491*76d0caaeSpatrickWhere: 492*76d0caaeSpatrick 493*76d0caaeSpatrick- If ``__has_feature(__atomic_flag)`` evaluates to 1 in the preprocessor then 494*76d0caaeSpatrick the compiler must define ``__atomic_flag__`` (e.g. as a typedef to ``int``). 495*76d0caaeSpatrick- If ``__has_feature(__atomic_flag)`` evaluates to 0 in the preprocessor then 496*76d0caaeSpatrick the library defines ``__atomic_flag__`` as a typedef to ``bool``. 497*76d0caaeSpatrick- To communicate that the above intrinsics are available, the compiler must 498*76d0caaeSpatrick arrange for ``__has_feature`` to return 1 when fed the intrinsic name 499*76d0caaeSpatrick appended with an '_' and the mangled type name of ``__atomic_flag__``. 500*76d0caaeSpatrick 501*76d0caaeSpatrickFor example if ``__atomic_flag__`` is ``unsigned int``: 502*76d0caaeSpatrick 503*76d0caaeSpatrick.. code-block:: cpp 504*76d0caaeSpatrick 505*76d0caaeSpatrick // __has_feature(__atomic_flag) == 1 506*76d0caaeSpatrick // __has_feature(__atomic_exchange_seq_cst_j) == 1 507*76d0caaeSpatrick // __has_feature(__atomic_store_seq_cst_j) == 1 508*76d0caaeSpatrick 509*76d0caaeSpatrick typedef unsigned int __atomic_flag__; 510*76d0caaeSpatrick 511*76d0caaeSpatrick unsigned int __atomic_exchange_seq_cst(unsigned int volatile*, unsigned int) { 512*76d0caaeSpatrick // ... 513*76d0caaeSpatrick } 514*76d0caaeSpatrick 515*76d0caaeSpatrick void __atomic_store_seq_cst(unsigned int volatile*, unsigned int) { 516*76d0caaeSpatrick // ... 517*76d0caaeSpatrick } 518*76d0caaeSpatrick 519*76d0caaeSpatrickThat's it! Compiler writers do the above and you've got a fully conforming 520*76d0caaeSpatrick(though sub-par performance) ``<atomic>`` header! 521*76d0caaeSpatrick 522*76d0caaeSpatrick 523*76d0caaeSpatrickRecommended work for a higher performance ``<atomic>`` 524*76d0caaeSpatrick------------------------------------------------------ 525*76d0caaeSpatrickIt would be good if the above intrinsics worked with all integral types plus 526*76d0caaeSpatrick``void*``. Because this may not be possible to do in a lock-free manner for 527*76d0caaeSpatrickall integral types on all platforms, a compiler must communicate each type that 528*76d0caaeSpatrickan intrinsic works with. For example, if ``__atomic_exchange_seq_cst`` works 529*76d0caaeSpatrickfor all types except for ``long long`` and ``unsigned long long`` then: 530*76d0caaeSpatrick 531*76d0caaeSpatrick.. code-block:: cpp 532*76d0caaeSpatrick 533*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_b) == 1 // bool 534*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_c) == 1 // char 535*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_a) == 1 // signed char 536*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_h) == 1 // unsigned char 537*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_Ds) == 1 // char16_t 538*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_Di) == 1 // char32_t 539*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_w) == 1 // wchar_t 540*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_s) == 1 // short 541*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_t) == 1 // unsigned short 542*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_i) == 1 // int 543*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_j) == 1 // unsigned int 544*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_l) == 1 // long 545*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_m) == 1 // unsigned long 546*76d0caaeSpatrick __has_feature(__atomic_exchange_seq_cst_Pv) == 1 // void* 547*76d0caaeSpatrick 548*76d0caaeSpatrickNote that only the ``__has_feature`` flag is decorated with the argument 549*76d0caaeSpatricktype. The name of the compiler intrinsic is not decorated, but instead works 550*76d0caaeSpatricklike a C++ overloaded function. 551*76d0caaeSpatrick 552*76d0caaeSpatrickAdditionally, there are other intrinsics besides ``__atomic_exchange_seq_cst`` 553*76d0caaeSpatrickand ``__atomic_store_seq_cst``. They are optional. But if the compiler can 554*76d0caaeSpatrickgenerate faster code than provided by the library, then clients will benefit 555*76d0caaeSpatrickfrom the compiler writer's expertise and knowledge of the targeted platform. 556*76d0caaeSpatrick 557*76d0caaeSpatrickBelow is the complete list of *sequentially consistent* intrinsics, and 558*76d0caaeSpatricktheir library implementations. Template syntax is used to indicate the desired 559*76d0caaeSpatrickoverloading for integral and ``void*`` types. The template does not represent a 560*76d0caaeSpatrickrequirement that the intrinsic operate on **any** type! 561*76d0caaeSpatrick 562*76d0caaeSpatrick.. code-block:: cpp 563*76d0caaeSpatrick 564*76d0caaeSpatrick // T is one of: 565*76d0caaeSpatrick // bool, char, signed char, unsigned char, short, unsigned short, 566*76d0caaeSpatrick // int, unsigned int, long, unsigned long, 567*76d0caaeSpatrick // long long, unsigned long long, char16_t, char32_t, wchar_t, void* 568*76d0caaeSpatrick 569*76d0caaeSpatrick template <class T> 570*76d0caaeSpatrick T __atomic_load_seq_cst(T const volatile* obj) { 571*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 572*76d0caaeSpatrick return *obj; 573*76d0caaeSpatrick } 574*76d0caaeSpatrick 575*76d0caaeSpatrick template <class T> 576*76d0caaeSpatrick void __atomic_store_seq_cst(T volatile* obj, T desr) { 577*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 578*76d0caaeSpatrick *obj = desr; 579*76d0caaeSpatrick } 580*76d0caaeSpatrick 581*76d0caaeSpatrick template <class T> 582*76d0caaeSpatrick T __atomic_exchange_seq_cst(T volatile* obj, T desr) { 583*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 584*76d0caaeSpatrick T r = *obj; 585*76d0caaeSpatrick *obj = desr; 586*76d0caaeSpatrick return r; 587*76d0caaeSpatrick } 588*76d0caaeSpatrick 589*76d0caaeSpatrick template <class T> 590*76d0caaeSpatrick bool __atomic_compare_exchange_strong_seq_cst_seq_cst(T volatile* obj, T* exp, T desr) { 591*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 592*76d0caaeSpatrick if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0) { 593*76d0caaeSpatrick std::memcpy(const_cast<T*>(obj), &desr, sizeof(T)); 594*76d0caaeSpatrick return true; 595*76d0caaeSpatrick } 596*76d0caaeSpatrick std::memcpy(exp, const_cast<T*>(obj), sizeof(T)); 597*76d0caaeSpatrick return false; 598*76d0caaeSpatrick } 599*76d0caaeSpatrick 600*76d0caaeSpatrick template <class T> 601*76d0caaeSpatrick bool __atomic_compare_exchange_weak_seq_cst_seq_cst(T volatile* obj, T* exp, T desr) { 602*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 603*76d0caaeSpatrick if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0) 604*76d0caaeSpatrick { 605*76d0caaeSpatrick std::memcpy(const_cast<T*>(obj), &desr, sizeof(T)); 606*76d0caaeSpatrick return true; 607*76d0caaeSpatrick } 608*76d0caaeSpatrick std::memcpy(exp, const_cast<T*>(obj), sizeof(T)); 609*76d0caaeSpatrick return false; 610*76d0caaeSpatrick } 611*76d0caaeSpatrick 612*76d0caaeSpatrick // T is one of: 613*76d0caaeSpatrick // char, signed char, unsigned char, short, unsigned short, 614*76d0caaeSpatrick // int, unsigned int, long, unsigned long, 615*76d0caaeSpatrick // long long, unsigned long long, char16_t, char32_t, wchar_t 616*76d0caaeSpatrick 617*76d0caaeSpatrick template <class T> 618*76d0caaeSpatrick T __atomic_fetch_add_seq_cst(T volatile* obj, T operand) { 619*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 620*76d0caaeSpatrick T r = *obj; 621*76d0caaeSpatrick *obj += operand; 622*76d0caaeSpatrick return r; 623*76d0caaeSpatrick } 624*76d0caaeSpatrick 625*76d0caaeSpatrick template <class T> 626*76d0caaeSpatrick T __atomic_fetch_sub_seq_cst(T volatile* obj, T operand) { 627*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 628*76d0caaeSpatrick T r = *obj; 629*76d0caaeSpatrick *obj -= operand; 630*76d0caaeSpatrick return r; 631*76d0caaeSpatrick } 632*76d0caaeSpatrick 633*76d0caaeSpatrick template <class T> 634*76d0caaeSpatrick T __atomic_fetch_and_seq_cst(T volatile* obj, T operand) { 635*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 636*76d0caaeSpatrick T r = *obj; 637*76d0caaeSpatrick *obj &= operand; 638*76d0caaeSpatrick return r; 639*76d0caaeSpatrick } 640*76d0caaeSpatrick 641*76d0caaeSpatrick template <class T> 642*76d0caaeSpatrick T __atomic_fetch_or_seq_cst(T volatile* obj, T operand) { 643*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 644*76d0caaeSpatrick T r = *obj; 645*76d0caaeSpatrick *obj |= operand; 646*76d0caaeSpatrick return r; 647*76d0caaeSpatrick } 648*76d0caaeSpatrick 649*76d0caaeSpatrick template <class T> 650*76d0caaeSpatrick T __atomic_fetch_xor_seq_cst(T volatile* obj, T operand) { 651*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 652*76d0caaeSpatrick T r = *obj; 653*76d0caaeSpatrick *obj ^= operand; 654*76d0caaeSpatrick return r; 655*76d0caaeSpatrick } 656*76d0caaeSpatrick 657*76d0caaeSpatrick void* __atomic_fetch_add_seq_cst(void* volatile* obj, ptrdiff_t operand) { 658*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 659*76d0caaeSpatrick void* r = *obj; 660*76d0caaeSpatrick (char*&)(*obj) += operand; 661*76d0caaeSpatrick return r; 662*76d0caaeSpatrick } 663*76d0caaeSpatrick 664*76d0caaeSpatrick void* __atomic_fetch_sub_seq_cst(void* volatile* obj, ptrdiff_t operand) { 665*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 666*76d0caaeSpatrick void* r = *obj; 667*76d0caaeSpatrick (char*&)(*obj) -= operand; 668*76d0caaeSpatrick return r; 669*76d0caaeSpatrick } 670*76d0caaeSpatrick 671*76d0caaeSpatrick void __atomic_thread_fence_seq_cst() { 672*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 673*76d0caaeSpatrick } 674*76d0caaeSpatrick 675*76d0caaeSpatrick void __atomic_signal_fence_seq_cst() { 676*76d0caaeSpatrick unique_lock<mutex> _(some_mutex); 677*76d0caaeSpatrick } 678*76d0caaeSpatrick 679*76d0caaeSpatrickOne should consult the (currently draft) `C++ Standard <https://wg21.link/n3126>`_ 680*76d0caaeSpatrickfor the details of the definitions for these operations. For example, 681*76d0caaeSpatrick``__atomic_compare_exchange_weak_seq_cst_seq_cst`` is allowed to fail 682*76d0caaeSpatrickspuriously while ``__atomic_compare_exchange_strong_seq_cst_seq_cst`` is not. 683*76d0caaeSpatrick 684*76d0caaeSpatrickIf on your platform the lock-free definition of ``__atomic_compare_exchange_weak_seq_cst_seq_cst`` 685*76d0caaeSpatrickwould be the same as ``__atomic_compare_exchange_strong_seq_cst_seq_cst``, you may omit the 686*76d0caaeSpatrick``__atomic_compare_exchange_weak_seq_cst_seq_cst`` intrinsic without a performance cost. The 687*76d0caaeSpatricklibrary will prefer your implementation of ``__atomic_compare_exchange_strong_seq_cst_seq_cst`` 688*76d0caaeSpatrickover its own definition for implementing ``__atomic_compare_exchange_weak_seq_cst_seq_cst``. 689*76d0caaeSpatrickThat is, the library will arrange for ``__atomic_compare_exchange_weak_seq_cst_seq_cst`` to call 690*76d0caaeSpatrick``__atomic_compare_exchange_strong_seq_cst_seq_cst`` if you supply an intrinsic for the strong 691*76d0caaeSpatrickversion but not the weak. 692*76d0caaeSpatrick 693*76d0caaeSpatrickTaking advantage of weaker memory synchronization 694*76d0caaeSpatrick------------------------------------------------- 695*76d0caaeSpatrickSo far, all of the intrinsics presented require a **sequentially consistent** memory ordering. 696*76d0caaeSpatrickThat is, no loads or stores can move across the operation (just as if the library had locked 697*76d0caaeSpatrickthat internal mutex). But ``<atomic>`` supports weaker memory ordering operations. In all, 698*76d0caaeSpatrickthere are six memory orderings (listed here from strongest to weakest): 699*76d0caaeSpatrick 700*76d0caaeSpatrick.. code-block:: cpp 701*76d0caaeSpatrick 702*76d0caaeSpatrick memory_order_seq_cst 703*76d0caaeSpatrick memory_order_acq_rel 704*76d0caaeSpatrick memory_order_release 705*76d0caaeSpatrick memory_order_acquire 706*76d0caaeSpatrick memory_order_consume 707*76d0caaeSpatrick memory_order_relaxed 708*76d0caaeSpatrick 709*76d0caaeSpatrick(See the `C++ Standard <https://wg21.link/n3126>`_ for the detailed definitions of each of these orderings). 710*76d0caaeSpatrick 711*76d0caaeSpatrickOn some platforms, the compiler vendor can offer some or even all of the above 712*76d0caaeSpatrickintrinsics at one or more weaker levels of memory synchronization. This might 713*76d0caaeSpatricklead for example to not issuing an ``mfence`` instruction on the x86. 714*76d0caaeSpatrick 715*76d0caaeSpatrickIf the compiler does not offer any given operation, at any given memory ordering 716*76d0caaeSpatricklevel, the library will automatically attempt to call the next highest memory 717*76d0caaeSpatrickordering operation. This continues up to ``seq_cst``, and if that doesn't 718*76d0caaeSpatrickexist, then the library takes over and does the job with a ``mutex``. This 719*76d0caaeSpatrickis a compile-time search and selection operation. At run time, the application 720*76d0caaeSpatrickwill only see the few inlined assembly instructions for the selected intrinsic. 721*76d0caaeSpatrick 722*76d0caaeSpatrickEach intrinsic is appended with the 7-letter name of the memory ordering it 723*76d0caaeSpatrickaddresses. For example a ``load`` with ``relaxed`` ordering is defined by: 724*76d0caaeSpatrick 725*76d0caaeSpatrick.. code-block:: cpp 726*76d0caaeSpatrick 727*76d0caaeSpatrick T __atomic_load_relaxed(const volatile T* obj); 728*76d0caaeSpatrick 729*76d0caaeSpatrickAnd announced with: 730*76d0caaeSpatrick 731*76d0caaeSpatrick.. code-block:: cpp 732*76d0caaeSpatrick 733*76d0caaeSpatrick __has_feature(__atomic_load_relaxed_b) == 1 // bool 734*76d0caaeSpatrick __has_feature(__atomic_load_relaxed_c) == 1 // char 735*76d0caaeSpatrick __has_feature(__atomic_load_relaxed_a) == 1 // signed char 736*76d0caaeSpatrick ... 737*76d0caaeSpatrick 738*76d0caaeSpatrickThe ``__atomic_compare_exchange_strong(weak)`` intrinsics are parameterized 739*76d0caaeSpatrickon two memory orderings. The first ordering applies when the operation returns 740*76d0caaeSpatrick``true`` and the second ordering applies when the operation returns ``false``. 741*76d0caaeSpatrick 742*76d0caaeSpatrickNot every memory ordering is appropriate for every operation. ``exchange`` 743*76d0caaeSpatrickand the ``fetch_XXX`` operations support all 6. But ``load`` only supports 744*76d0caaeSpatrick``relaxed``, ``consume``, ``acquire`` and ``seq_cst``. ``store`` only supports 745*76d0caaeSpatrick``relaxed``, ``release``, and ``seq_cst``. The ``compare_exchange`` operations 746*76d0caaeSpatricksupport the following 16 combinations out of the possible 36: 747*76d0caaeSpatrick 748*76d0caaeSpatrick.. code-block:: cpp 749*76d0caaeSpatrick 750*76d0caaeSpatrick relaxed_relaxed 751*76d0caaeSpatrick consume_relaxed 752*76d0caaeSpatrick consume_consume 753*76d0caaeSpatrick acquire_relaxed 754*76d0caaeSpatrick acquire_consume 755*76d0caaeSpatrick acquire_acquire 756*76d0caaeSpatrick release_relaxed 757*76d0caaeSpatrick release_consume 758*76d0caaeSpatrick release_acquire 759*76d0caaeSpatrick acq_rel_relaxed 760*76d0caaeSpatrick acq_rel_consume 761*76d0caaeSpatrick acq_rel_acquire 762*76d0caaeSpatrick seq_cst_relaxed 763*76d0caaeSpatrick seq_cst_consume 764*76d0caaeSpatrick seq_cst_acquire 765*76d0caaeSpatrick seq_cst_seq_cst 766*76d0caaeSpatrick 767*76d0caaeSpatrickAgain, the compiler supplies intrinsics only for the strongest orderings where 768*76d0caaeSpatrickit can make a difference. The library takes care of calling the weakest 769*76d0caaeSpatricksupplied intrinsic that is as strong or stronger than the customer asked for. 770*76d0caaeSpatrick 771*76d0caaeSpatrickNote about ABI 772*76d0caaeSpatrick============== 773*76d0caaeSpatrickWith any design, the (back end) compiler writer should note that the decision to 774*76d0caaeSpatrickimplement lock-free operations on any given type (or not) is an ABI-binding decision. 775*76d0caaeSpatrickOne can not change from treating a type as not lock free, to lock free (or vice-versa) 776*76d0caaeSpatrickwithout breaking your ABI. 777*76d0caaeSpatrick 778*76d0caaeSpatrickFor example: 779*76d0caaeSpatrick 780*76d0caaeSpatrick**TU1.cpp**: 781*76d0caaeSpatrick 782*76d0caaeSpatrick.. code-block:: cpp 783*76d0caaeSpatrick 784*76d0caaeSpatrick extern atomic<long long> A; 785*76d0caaeSpatrick int foo() { return A.compare_exchange_strong(w, x); } 786*76d0caaeSpatrick 787*76d0caaeSpatrick 788*76d0caaeSpatrick**TU2.cpp**: 789*76d0caaeSpatrick 790*76d0caaeSpatrick.. code-block:: cpp 791*76d0caaeSpatrick 792*76d0caaeSpatrick extern atomic<long long> A; 793*76d0caaeSpatrick void bar() { return A.compare_exchange_strong(y, z); } 794*76d0caaeSpatrick 795*76d0caaeSpatrickIf only **one** of these calls to ``compare_exchange_strong`` is implemented with 796*76d0caaeSpatrickmutex-locked code, then that mutex-locked code will not be executed mutually 797*76d0caaeSpatrickexclusively of the one implemented in a lock-free manner. 798