xref: /netbsd-src/external/bsd/jemalloc.old/include/jemalloc/internal/atomic_gcc_sync.h (revision 8e33eff89e26cf71871ead62f0d5063e1313c33a)
1*8e33eff8Schristos #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
2*8e33eff8Schristos #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
3*8e33eff8Schristos 
4*8e33eff8Schristos #define ATOMIC_INIT(...) {__VA_ARGS__}
5*8e33eff8Schristos 
6*8e33eff8Schristos typedef enum {
7*8e33eff8Schristos 	atomic_memory_order_relaxed,
8*8e33eff8Schristos 	atomic_memory_order_acquire,
9*8e33eff8Schristos 	atomic_memory_order_release,
10*8e33eff8Schristos 	atomic_memory_order_acq_rel,
11*8e33eff8Schristos 	atomic_memory_order_seq_cst
12*8e33eff8Schristos } atomic_memory_order_t;
13*8e33eff8Schristos 
14*8e33eff8Schristos ATOMIC_INLINE void
15*8e33eff8Schristos atomic_fence(atomic_memory_order_t mo) {
16*8e33eff8Schristos 	/* Easy cases first: no barrier, and full barrier. */
17*8e33eff8Schristos 	if (mo == atomic_memory_order_relaxed) {
18*8e33eff8Schristos 		asm volatile("" ::: "memory");
19*8e33eff8Schristos 		return;
20*8e33eff8Schristos 	}
21*8e33eff8Schristos 	if (mo == atomic_memory_order_seq_cst) {
22*8e33eff8Schristos 		asm volatile("" ::: "memory");
23*8e33eff8Schristos 		__sync_synchronize();
24*8e33eff8Schristos 		asm volatile("" ::: "memory");
25*8e33eff8Schristos 		return;
26*8e33eff8Schristos 	}
27*8e33eff8Schristos 	asm volatile("" ::: "memory");
28*8e33eff8Schristos #  if defined(__i386__) || defined(__x86_64__)
29*8e33eff8Schristos 	/* This is implicit on x86. */
30*8e33eff8Schristos #  elif defined(__ppc__)
31*8e33eff8Schristos 	asm volatile("lwsync");
32*8e33eff8Schristos #  elif defined(__sparc__) && defined(__arch64__)
33*8e33eff8Schristos 	if (mo == atomic_memory_order_acquire) {
34*8e33eff8Schristos 		asm volatile("membar #LoadLoad | #LoadStore");
35*8e33eff8Schristos 	} else if (mo == atomic_memory_order_release) {
36*8e33eff8Schristos 		asm volatile("membar #LoadStore | #StoreStore");
37*8e33eff8Schristos 	} else {
38*8e33eff8Schristos 		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
39*8e33eff8Schristos 	}
40*8e33eff8Schristos #  else
41*8e33eff8Schristos 	__sync_synchronize();
42*8e33eff8Schristos #  endif
43*8e33eff8Schristos 	asm volatile("" ::: "memory");
44*8e33eff8Schristos }
45*8e33eff8Schristos 
46*8e33eff8Schristos /*
47*8e33eff8Schristos  * A correct implementation of seq_cst loads and stores on weakly ordered
48*8e33eff8Schristos  * architectures could do either of the following:
49*8e33eff8Schristos  *   1. store() is weak-fence -> store -> strong fence, load() is load ->
50*8e33eff8Schristos  *      strong-fence.
51*8e33eff8Schristos  *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
52*8e33eff8Schristos  *      weak-fence.
53*8e33eff8Schristos  * The tricky thing is, load() and store() above can be the load or store
54*8e33eff8Schristos  * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
55*8e33eff8Schristos  * means going with strategy 2.
56*8e33eff8Schristos  * On strongly ordered architectures, the natural strategy is to stick a strong
57*8e33eff8Schristos  * fence after seq_cst stores, and have naked loads.  So we want the strong
58*8e33eff8Schristos  * fences in different places on different architectures.
59*8e33eff8Schristos  * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
60*8e33eff8Schristos  * accomplish this.
61*8e33eff8Schristos  */
62*8e33eff8Schristos 
63*8e33eff8Schristos ATOMIC_INLINE void
64*8e33eff8Schristos atomic_pre_sc_load_fence() {
65*8e33eff8Schristos #  if defined(__i386__) || defined(__x86_64__) ||			\
66*8e33eff8Schristos     (defined(__sparc__) && defined(__arch64__))
67*8e33eff8Schristos 	atomic_fence(atomic_memory_order_relaxed);
68*8e33eff8Schristos #  else
69*8e33eff8Schristos 	atomic_fence(atomic_memory_order_seq_cst);
70*8e33eff8Schristos #  endif
71*8e33eff8Schristos }
72*8e33eff8Schristos 
73*8e33eff8Schristos ATOMIC_INLINE void
74*8e33eff8Schristos atomic_post_sc_store_fence() {
75*8e33eff8Schristos #  if defined(__i386__) || defined(__x86_64__) ||			\
76*8e33eff8Schristos     (defined(__sparc__) && defined(__arch64__))
77*8e33eff8Schristos 	atomic_fence(atomic_memory_order_seq_cst);
78*8e33eff8Schristos #  else
79*8e33eff8Schristos 	atomic_fence(atomic_memory_order_relaxed);
80*8e33eff8Schristos #  endif
81*8e33eff8Schristos 
82*8e33eff8Schristos }
83*8e33eff8Schristos 
84*8e33eff8Schristos #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
85*8e33eff8Schristos     /* unused */ lg_size)						\
86*8e33eff8Schristos typedef struct {							\
87*8e33eff8Schristos 	type volatile repr;						\
88*8e33eff8Schristos } atomic_##short_type##_t;						\
89*8e33eff8Schristos 									\
90*8e33eff8Schristos ATOMIC_INLINE type							\
91*8e33eff8Schristos atomic_load_##short_type(const atomic_##short_type##_t *a,		\
92*8e33eff8Schristos     atomic_memory_order_t mo) {						\
93*8e33eff8Schristos 	if (mo == atomic_memory_order_seq_cst) {			\
94*8e33eff8Schristos 		atomic_pre_sc_load_fence();				\
95*8e33eff8Schristos 	}								\
96*8e33eff8Schristos 	type result = a->repr;						\
97*8e33eff8Schristos 	if (mo != atomic_memory_order_relaxed) {			\
98*8e33eff8Schristos 		atomic_fence(atomic_memory_order_acquire);		\
99*8e33eff8Schristos 	}								\
100*8e33eff8Schristos 	return result;							\
101*8e33eff8Schristos }									\
102*8e33eff8Schristos 									\
103*8e33eff8Schristos ATOMIC_INLINE void							\
104*8e33eff8Schristos atomic_store_##short_type(atomic_##short_type##_t *a,			\
105*8e33eff8Schristos     type val, atomic_memory_order_t mo) {				\
106*8e33eff8Schristos 	if (mo != atomic_memory_order_relaxed) {			\
107*8e33eff8Schristos 		atomic_fence(atomic_memory_order_release);		\
108*8e33eff8Schristos 	}								\
109*8e33eff8Schristos 	a->repr = val;							\
110*8e33eff8Schristos 	if (mo == atomic_memory_order_seq_cst) {			\
111*8e33eff8Schristos 		atomic_post_sc_store_fence();				\
112*8e33eff8Schristos 	}								\
113*8e33eff8Schristos }									\
114*8e33eff8Schristos 									\
115*8e33eff8Schristos ATOMIC_INLINE type							\
116*8e33eff8Schristos atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
117*8e33eff8Schristos     atomic_memory_order_t mo) {						\
118*8e33eff8Schristos 	/*								\
119*8e33eff8Schristos 	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
120*8e33eff8Schristos 	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
121*8e33eff8Schristos 	 */								\
122*8e33eff8Schristos 	while (true) {							\
123*8e33eff8Schristos 		type old = a->repr;					\
124*8e33eff8Schristos 		if (__sync_bool_compare_and_swap(&a->repr, old, val)) {	\
125*8e33eff8Schristos 			return old;					\
126*8e33eff8Schristos 		}							\
127*8e33eff8Schristos 	}								\
128*8e33eff8Schristos }									\
129*8e33eff8Schristos 									\
130*8e33eff8Schristos ATOMIC_INLINE bool							\
131*8e33eff8Schristos atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
132*8e33eff8Schristos     type *expected, type desired, atomic_memory_order_t success_mo,	\
133*8e33eff8Schristos     atomic_memory_order_t failure_mo) {					\
134*8e33eff8Schristos 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
135*8e33eff8Schristos 	    desired);							\
136*8e33eff8Schristos 	if (prev == *expected) {					\
137*8e33eff8Schristos 		return true;						\
138*8e33eff8Schristos 	} else {							\
139*8e33eff8Schristos 		*expected = prev;					\
140*8e33eff8Schristos 		return false;						\
141*8e33eff8Schristos 	}								\
142*8e33eff8Schristos }									\
143*8e33eff8Schristos ATOMIC_INLINE bool							\
144*8e33eff8Schristos atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
145*8e33eff8Schristos     type *expected, type desired, atomic_memory_order_t success_mo,	\
146*8e33eff8Schristos     atomic_memory_order_t failure_mo) {					\
147*8e33eff8Schristos 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
148*8e33eff8Schristos 	    desired);							\
149*8e33eff8Schristos 	if (prev == *expected) {					\
150*8e33eff8Schristos 		return true;						\
151*8e33eff8Schristos 	} else {							\
152*8e33eff8Schristos 		*expected = prev;					\
153*8e33eff8Schristos 		return false;						\
154*8e33eff8Schristos 	}								\
155*8e33eff8Schristos }
156*8e33eff8Schristos 
157*8e33eff8Schristos #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
158*8e33eff8Schristos     /* unused */ lg_size)						\
159*8e33eff8Schristos JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
160*8e33eff8Schristos 									\
161*8e33eff8Schristos ATOMIC_INLINE type							\
162*8e33eff8Schristos atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
163*8e33eff8Schristos     atomic_memory_order_t mo) {						\
164*8e33eff8Schristos 	return __sync_fetch_and_add(&a->repr, val);			\
165*8e33eff8Schristos }									\
166*8e33eff8Schristos 									\
167*8e33eff8Schristos ATOMIC_INLINE type							\
168*8e33eff8Schristos atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
169*8e33eff8Schristos     atomic_memory_order_t mo) {						\
170*8e33eff8Schristos 	return __sync_fetch_and_sub(&a->repr, val);			\
171*8e33eff8Schristos }									\
172*8e33eff8Schristos 									\
173*8e33eff8Schristos ATOMIC_INLINE type							\
174*8e33eff8Schristos atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
175*8e33eff8Schristos     atomic_memory_order_t mo) {						\
176*8e33eff8Schristos 	return __sync_fetch_and_and(&a->repr, val);			\
177*8e33eff8Schristos }									\
178*8e33eff8Schristos 									\
179*8e33eff8Schristos ATOMIC_INLINE type							\
180*8e33eff8Schristos atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
181*8e33eff8Schristos     atomic_memory_order_t mo) {						\
182*8e33eff8Schristos 	return __sync_fetch_and_or(&a->repr, val);			\
183*8e33eff8Schristos }									\
184*8e33eff8Schristos 									\
185*8e33eff8Schristos ATOMIC_INLINE type							\
186*8e33eff8Schristos atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
187*8e33eff8Schristos     atomic_memory_order_t mo) {						\
188*8e33eff8Schristos 	return __sync_fetch_and_xor(&a->repr, val);			\
189*8e33eff8Schristos }
190*8e33eff8Schristos 
191*8e33eff8Schristos #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
192