1*8e33eff8Schristos #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H 2*8e33eff8Schristos #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H 3*8e33eff8Schristos 4*8e33eff8Schristos #define ATOMIC_INIT(...) {__VA_ARGS__} 5*8e33eff8Schristos 6*8e33eff8Schristos typedef enum { 7*8e33eff8Schristos atomic_memory_order_relaxed, 8*8e33eff8Schristos atomic_memory_order_acquire, 9*8e33eff8Schristos atomic_memory_order_release, 10*8e33eff8Schristos atomic_memory_order_acq_rel, 11*8e33eff8Schristos atomic_memory_order_seq_cst 12*8e33eff8Schristos } atomic_memory_order_t; 13*8e33eff8Schristos 14*8e33eff8Schristos ATOMIC_INLINE void 15*8e33eff8Schristos atomic_fence(atomic_memory_order_t mo) { 16*8e33eff8Schristos /* Easy cases first: no barrier, and full barrier. */ 17*8e33eff8Schristos if (mo == atomic_memory_order_relaxed) { 18*8e33eff8Schristos asm volatile("" ::: "memory"); 19*8e33eff8Schristos return; 20*8e33eff8Schristos } 21*8e33eff8Schristos if (mo == atomic_memory_order_seq_cst) { 22*8e33eff8Schristos asm volatile("" ::: "memory"); 23*8e33eff8Schristos __sync_synchronize(); 24*8e33eff8Schristos asm volatile("" ::: "memory"); 25*8e33eff8Schristos return; 26*8e33eff8Schristos } 27*8e33eff8Schristos asm volatile("" ::: "memory"); 28*8e33eff8Schristos # if defined(__i386__) || defined(__x86_64__) 29*8e33eff8Schristos /* This is implicit on x86. */ 30*8e33eff8Schristos # elif defined(__ppc__) 31*8e33eff8Schristos asm volatile("lwsync"); 32*8e33eff8Schristos # elif defined(__sparc__) && defined(__arch64__) 33*8e33eff8Schristos if (mo == atomic_memory_order_acquire) { 34*8e33eff8Schristos asm volatile("membar #LoadLoad | #LoadStore"); 35*8e33eff8Schristos } else if (mo == atomic_memory_order_release) { 36*8e33eff8Schristos asm volatile("membar #LoadStore | #StoreStore"); 37*8e33eff8Schristos } else { 38*8e33eff8Schristos asm volatile("membar #LoadLoad | #LoadStore | #StoreStore"); 39*8e33eff8Schristos } 40*8e33eff8Schristos # else 41*8e33eff8Schristos __sync_synchronize(); 42*8e33eff8Schristos # endif 43*8e33eff8Schristos asm volatile("" ::: "memory"); 44*8e33eff8Schristos } 45*8e33eff8Schristos 46*8e33eff8Schristos /* 47*8e33eff8Schristos * A correct implementation of seq_cst loads and stores on weakly ordered 48*8e33eff8Schristos * architectures could do either of the following: 49*8e33eff8Schristos * 1. store() is weak-fence -> store -> strong fence, load() is load -> 50*8e33eff8Schristos * strong-fence. 51*8e33eff8Schristos * 2. store() is strong-fence -> store, load() is strong-fence -> load -> 52*8e33eff8Schristos * weak-fence. 53*8e33eff8Schristos * The tricky thing is, load() and store() above can be the load or store 54*8e33eff8Schristos * portions of a gcc __sync builtin, so we have to follow GCC's lead, which 55*8e33eff8Schristos * means going with strategy 2. 56*8e33eff8Schristos * On strongly ordered architectures, the natural strategy is to stick a strong 57*8e33eff8Schristos * fence after seq_cst stores, and have naked loads. So we want the strong 58*8e33eff8Schristos * fences in different places on different architectures. 59*8e33eff8Schristos * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to 60*8e33eff8Schristos * accomplish this. 61*8e33eff8Schristos */ 62*8e33eff8Schristos 63*8e33eff8Schristos ATOMIC_INLINE void 64*8e33eff8Schristos atomic_pre_sc_load_fence() { 65*8e33eff8Schristos # if defined(__i386__) || defined(__x86_64__) || \ 66*8e33eff8Schristos (defined(__sparc__) && defined(__arch64__)) 67*8e33eff8Schristos atomic_fence(atomic_memory_order_relaxed); 68*8e33eff8Schristos # else 69*8e33eff8Schristos atomic_fence(atomic_memory_order_seq_cst); 70*8e33eff8Schristos # endif 71*8e33eff8Schristos } 72*8e33eff8Schristos 73*8e33eff8Schristos ATOMIC_INLINE void 74*8e33eff8Schristos atomic_post_sc_store_fence() { 75*8e33eff8Schristos # if defined(__i386__) || defined(__x86_64__) || \ 76*8e33eff8Schristos (defined(__sparc__) && defined(__arch64__)) 77*8e33eff8Schristos atomic_fence(atomic_memory_order_seq_cst); 78*8e33eff8Schristos # else 79*8e33eff8Schristos atomic_fence(atomic_memory_order_relaxed); 80*8e33eff8Schristos # endif 81*8e33eff8Schristos 82*8e33eff8Schristos } 83*8e33eff8Schristos 84*8e33eff8Schristos #define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ 85*8e33eff8Schristos /* unused */ lg_size) \ 86*8e33eff8Schristos typedef struct { \ 87*8e33eff8Schristos type volatile repr; \ 88*8e33eff8Schristos } atomic_##short_type##_t; \ 89*8e33eff8Schristos \ 90*8e33eff8Schristos ATOMIC_INLINE type \ 91*8e33eff8Schristos atomic_load_##short_type(const atomic_##short_type##_t *a, \ 92*8e33eff8Schristos atomic_memory_order_t mo) { \ 93*8e33eff8Schristos if (mo == atomic_memory_order_seq_cst) { \ 94*8e33eff8Schristos atomic_pre_sc_load_fence(); \ 95*8e33eff8Schristos } \ 96*8e33eff8Schristos type result = a->repr; \ 97*8e33eff8Schristos if (mo != atomic_memory_order_relaxed) { \ 98*8e33eff8Schristos atomic_fence(atomic_memory_order_acquire); \ 99*8e33eff8Schristos } \ 100*8e33eff8Schristos return result; \ 101*8e33eff8Schristos } \ 102*8e33eff8Schristos \ 103*8e33eff8Schristos ATOMIC_INLINE void \ 104*8e33eff8Schristos atomic_store_##short_type(atomic_##short_type##_t *a, \ 105*8e33eff8Schristos type val, atomic_memory_order_t mo) { \ 106*8e33eff8Schristos if (mo != atomic_memory_order_relaxed) { \ 107*8e33eff8Schristos atomic_fence(atomic_memory_order_release); \ 108*8e33eff8Schristos } \ 109*8e33eff8Schristos a->repr = val; \ 110*8e33eff8Schristos if (mo == atomic_memory_order_seq_cst) { \ 111*8e33eff8Schristos atomic_post_sc_store_fence(); \ 112*8e33eff8Schristos } \ 113*8e33eff8Schristos } \ 114*8e33eff8Schristos \ 115*8e33eff8Schristos ATOMIC_INLINE type \ 116*8e33eff8Schristos atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ 117*8e33eff8Schristos atomic_memory_order_t mo) { \ 118*8e33eff8Schristos /* \ 119*8e33eff8Schristos * Because of FreeBSD, we care about gcc 4.2, which doesn't have\ 120*8e33eff8Schristos * an atomic exchange builtin. We fake it with a CAS loop. \ 121*8e33eff8Schristos */ \ 122*8e33eff8Schristos while (true) { \ 123*8e33eff8Schristos type old = a->repr; \ 124*8e33eff8Schristos if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \ 125*8e33eff8Schristos return old; \ 126*8e33eff8Schristos } \ 127*8e33eff8Schristos } \ 128*8e33eff8Schristos } \ 129*8e33eff8Schristos \ 130*8e33eff8Schristos ATOMIC_INLINE bool \ 131*8e33eff8Schristos atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ 132*8e33eff8Schristos type *expected, type desired, atomic_memory_order_t success_mo, \ 133*8e33eff8Schristos atomic_memory_order_t failure_mo) { \ 134*8e33eff8Schristos type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ 135*8e33eff8Schristos desired); \ 136*8e33eff8Schristos if (prev == *expected) { \ 137*8e33eff8Schristos return true; \ 138*8e33eff8Schristos } else { \ 139*8e33eff8Schristos *expected = prev; \ 140*8e33eff8Schristos return false; \ 141*8e33eff8Schristos } \ 142*8e33eff8Schristos } \ 143*8e33eff8Schristos ATOMIC_INLINE bool \ 144*8e33eff8Schristos atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ 145*8e33eff8Schristos type *expected, type desired, atomic_memory_order_t success_mo, \ 146*8e33eff8Schristos atomic_memory_order_t failure_mo) { \ 147*8e33eff8Schristos type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ 148*8e33eff8Schristos desired); \ 149*8e33eff8Schristos if (prev == *expected) { \ 150*8e33eff8Schristos return true; \ 151*8e33eff8Schristos } else { \ 152*8e33eff8Schristos *expected = prev; \ 153*8e33eff8Schristos return false; \ 154*8e33eff8Schristos } \ 155*8e33eff8Schristos } 156*8e33eff8Schristos 157*8e33eff8Schristos #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ 158*8e33eff8Schristos /* unused */ lg_size) \ 159*8e33eff8Schristos JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ 160*8e33eff8Schristos \ 161*8e33eff8Schristos ATOMIC_INLINE type \ 162*8e33eff8Schristos atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \ 163*8e33eff8Schristos atomic_memory_order_t mo) { \ 164*8e33eff8Schristos return __sync_fetch_and_add(&a->repr, val); \ 165*8e33eff8Schristos } \ 166*8e33eff8Schristos \ 167*8e33eff8Schristos ATOMIC_INLINE type \ 168*8e33eff8Schristos atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \ 169*8e33eff8Schristos atomic_memory_order_t mo) { \ 170*8e33eff8Schristos return __sync_fetch_and_sub(&a->repr, val); \ 171*8e33eff8Schristos } \ 172*8e33eff8Schristos \ 173*8e33eff8Schristos ATOMIC_INLINE type \ 174*8e33eff8Schristos atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \ 175*8e33eff8Schristos atomic_memory_order_t mo) { \ 176*8e33eff8Schristos return __sync_fetch_and_and(&a->repr, val); \ 177*8e33eff8Schristos } \ 178*8e33eff8Schristos \ 179*8e33eff8Schristos ATOMIC_INLINE type \ 180*8e33eff8Schristos atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \ 181*8e33eff8Schristos atomic_memory_order_t mo) { \ 182*8e33eff8Schristos return __sync_fetch_and_or(&a->repr, val); \ 183*8e33eff8Schristos } \ 184*8e33eff8Schristos \ 185*8e33eff8Schristos ATOMIC_INLINE type \ 186*8e33eff8Schristos atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \ 187*8e33eff8Schristos atomic_memory_order_t mo) { \ 188*8e33eff8Schristos return __sync_fetch_and_xor(&a->repr, val); \ 189*8e33eff8Schristos } 190*8e33eff8Schristos 191*8e33eff8Schristos #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */ 192