xref: /netbsd-src/external/bsd/jemalloc.old/include/jemalloc/internal/mutex.h (revision 8e33eff89e26cf71871ead62f0d5063e1313c33a)
1*8e33eff8Schristos #ifndef JEMALLOC_INTERNAL_MUTEX_H
2*8e33eff8Schristos #define JEMALLOC_INTERNAL_MUTEX_H
3*8e33eff8Schristos 
4*8e33eff8Schristos #include "jemalloc/internal/atomic.h"
5*8e33eff8Schristos #include "jemalloc/internal/mutex_prof.h"
6*8e33eff8Schristos #include "jemalloc/internal/tsd.h"
7*8e33eff8Schristos #include "jemalloc/internal/witness.h"
8*8e33eff8Schristos 
9*8e33eff8Schristos typedef enum {
10*8e33eff8Schristos 	/* Can only acquire one mutex of a given witness rank at a time. */
11*8e33eff8Schristos 	malloc_mutex_rank_exclusive,
12*8e33eff8Schristos 	/*
13*8e33eff8Schristos 	 * Can acquire multiple mutexes of the same witness rank, but in
14*8e33eff8Schristos 	 * address-ascending order only.
15*8e33eff8Schristos 	 */
16*8e33eff8Schristos 	malloc_mutex_address_ordered
17*8e33eff8Schristos } malloc_mutex_lock_order_t;
18*8e33eff8Schristos 
19*8e33eff8Schristos typedef struct malloc_mutex_s malloc_mutex_t;
20*8e33eff8Schristos struct malloc_mutex_s {
21*8e33eff8Schristos 	union {
22*8e33eff8Schristos 		struct {
23*8e33eff8Schristos 			/*
24*8e33eff8Schristos 			 * prof_data is defined first to reduce cacheline
25*8e33eff8Schristos 			 * bouncing: the data is not touched by the mutex holder
26*8e33eff8Schristos 			 * during unlocking, while might be modified by
27*8e33eff8Schristos 			 * contenders.  Having it before the mutex itself could
28*8e33eff8Schristos 			 * avoid prefetching a modified cacheline (for the
29*8e33eff8Schristos 			 * unlocking thread).
30*8e33eff8Schristos 			 */
31*8e33eff8Schristos 			mutex_prof_data_t	prof_data;
32*8e33eff8Schristos #ifdef _WIN32
33*8e33eff8Schristos #  if _WIN32_WINNT >= 0x0600
34*8e33eff8Schristos 			SRWLOCK         	lock;
35*8e33eff8Schristos #  else
36*8e33eff8Schristos 			CRITICAL_SECTION	lock;
37*8e33eff8Schristos #  endif
38*8e33eff8Schristos #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
39*8e33eff8Schristos 			os_unfair_lock		lock;
40*8e33eff8Schristos #elif (defined(JEMALLOC_OSSPIN))
41*8e33eff8Schristos 			OSSpinLock		lock;
42*8e33eff8Schristos #elif (defined(JEMALLOC_MUTEX_INIT_CB))
43*8e33eff8Schristos 			pthread_mutex_t		lock;
44*8e33eff8Schristos 			malloc_mutex_t		*postponed_next;
45*8e33eff8Schristos #else
46*8e33eff8Schristos 			pthread_mutex_t		lock;
47*8e33eff8Schristos #endif
48*8e33eff8Schristos 		};
49*8e33eff8Schristos 		/*
50*8e33eff8Schristos 		 * We only touch witness when configured w/ debug.  However we
51*8e33eff8Schristos 		 * keep the field in a union when !debug so that we don't have
52*8e33eff8Schristos 		 * to pollute the code base with #ifdefs, while avoid paying the
53*8e33eff8Schristos 		 * memory cost.
54*8e33eff8Schristos 		 */
55*8e33eff8Schristos #if !defined(JEMALLOC_DEBUG)
56*8e33eff8Schristos 		witness_t			witness;
57*8e33eff8Schristos 		malloc_mutex_lock_order_t	lock_order;
58*8e33eff8Schristos #endif
59*8e33eff8Schristos 	};
60*8e33eff8Schristos 
61*8e33eff8Schristos #if defined(JEMALLOC_DEBUG)
62*8e33eff8Schristos 	witness_t			witness;
63*8e33eff8Schristos 	malloc_mutex_lock_order_t	lock_order;
64*8e33eff8Schristos #define LOCK_ORDER_INITIALIZER(field, a)	field = a,
65*8e33eff8Schristos #else
66*8e33eff8Schristos #define LOCK_ORDER_INITIALIZER(field, a)
67*8e33eff8Schristos #endif
68*8e33eff8Schristos };
69*8e33eff8Schristos 
70*8e33eff8Schristos /*
71*8e33eff8Schristos  * Based on benchmark results, a fixed spin with this amount of retries works
72*8e33eff8Schristos  * well for our critical sections.
73*8e33eff8Schristos  */
74*8e33eff8Schristos #define MALLOC_MUTEX_MAX_SPIN 250
75*8e33eff8Schristos 
76*8e33eff8Schristos #ifdef _WIN32
77*8e33eff8Schristos #  if _WIN32_WINNT >= 0x0600
78*8e33eff8Schristos #    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
79*8e33eff8Schristos #    define MALLOC_MUTEX_UNLOCK(m)  ReleaseSRWLockExclusive(&(m)->lock)
80*8e33eff8Schristos #    define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
81*8e33eff8Schristos #  else
82*8e33eff8Schristos #    define MALLOC_MUTEX_LOCK(m)    EnterCriticalSection(&(m)->lock)
83*8e33eff8Schristos #    define MALLOC_MUTEX_UNLOCK(m)  LeaveCriticalSection(&(m)->lock)
84*8e33eff8Schristos #    define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
85*8e33eff8Schristos #  endif
86*8e33eff8Schristos #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
87*8e33eff8Schristos #    define MALLOC_MUTEX_LOCK(m)    os_unfair_lock_lock(&(m)->lock)
88*8e33eff8Schristos #    define MALLOC_MUTEX_UNLOCK(m)  os_unfair_lock_unlock(&(m)->lock)
89*8e33eff8Schristos #    define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
90*8e33eff8Schristos #elif (defined(JEMALLOC_OSSPIN))
91*8e33eff8Schristos #    define MALLOC_MUTEX_LOCK(m)    OSSpinLockLock(&(m)->lock)
92*8e33eff8Schristos #    define MALLOC_MUTEX_UNLOCK(m)  OSSpinLockUnlock(&(m)->lock)
93*8e33eff8Schristos #    define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
94*8e33eff8Schristos #else
95*8e33eff8Schristos #    define MALLOC_MUTEX_LOCK(m)    pthread_mutex_lock(&(m)->lock)
96*8e33eff8Schristos #    define MALLOC_MUTEX_UNLOCK(m)  pthread_mutex_unlock(&(m)->lock)
97*8e33eff8Schristos #    define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
98*8e33eff8Schristos #endif
99*8e33eff8Schristos 
100*8e33eff8Schristos #ifdef _WIN32
101*8e33eff8Schristos #  define MALLOC_MUTEX_INITIALIZER
102*8e33eff8Schristos #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
103*8e33eff8Schristos #  define MALLOC_MUTEX_INITIALIZER {{{					\
104*8e33eff8Schristos 	.prof_data = MUTEX_PROF_DATA_INITIALIZER, 			\
105*8e33eff8Schristos 	.lock = OS_UNFAIR_LOCK_INIT,					\
106*8e33eff8Schristos 	}},								\
107*8e33eff8Schristos 	WITNESS_INITIALIZER(.witness, "mutex", WITNESS_RANK_OMIT)	\
108*8e33eff8Schristos 	LOCK_ORDER_INITIALIZER(.lock_order, malloc_mutex_rank_exclusive)}
109*8e33eff8Schristos #elif (defined(JEMALLOC_OSSPIN))
110*8e33eff8Schristos #  define MALLOC_MUTEX_INITIALIZER {{{					\
111*8e33eff8Schristos 	.prof_data = MUTEX_PROF_DATA_INITIALIZER,			\
112*8e33eff8Schristos 	.lock = 0,							\
113*8e33eff8Schristos 	}},								\
114*8e33eff8Schristos 	WITNESS_INITIALIZER(.witness, "mutex", WITNESS_RANK_OMIT)	\
115*8e33eff8Schristos 	LOCK_ORDER_INITIALIZER(.lock_order, malloc_mutex_rank_exclusive)}
116*8e33eff8Schristos #elif (defined(JEMALLOC_MUTEX_INIT_CB))
117*8e33eff8Schristos #  define MALLOC_MUTEX_INITIALIZER {{{					\
118*8e33eff8Schristos 	.prof_data = MUTEX_PROF_DATA_INITIALIZER,			\
119*8e33eff8Schristos 	.lock = PTHREAD_MUTEX_INITIALIZER,				\
120*8e33eff8Schristos 	.postponed_next = NULL,						\
121*8e33eff8Schristos 	}},	\
122*8e33eff8Schristos 	WITNESS_INITIALIZER(.witness, "mutex", WITNESS_RANK_OMIT)	\
123*8e33eff8Schristos 	LOCK_ORDER_INITIALIZER(.lock_order, malloc_mutex_rank_exclusive)}
124*8e33eff8Schristos #else
125*8e33eff8Schristos #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
126*8e33eff8Schristos #    define MALLOC_MUTEX_INITIALIZER {{{ 				\
127*8e33eff8Schristos 	.prof_data = MUTEX_PROF_DATA_INITIALIZER, 			\
128*8e33eff8Schristos 	.lock = PTHREAD_MUTEX_INITIALIZER, 				\
129*8e33eff8Schristos 	}},								\
130*8e33eff8Schristos         WITNESS_INITIALIZER(.witness, "mutex", WITNESS_RANK_OMIT) 	\
131*8e33eff8Schristos 	LOCK_ORDER_INITIALIZER(.lock_order, malloc_mutex_rank_exclusive)}
132*8e33eff8Schristos #endif
133*8e33eff8Schristos 
134*8e33eff8Schristos #ifdef JEMALLOC_LAZY_LOCK
135*8e33eff8Schristos extern bool isthreaded;
136*8e33eff8Schristos #else
137*8e33eff8Schristos #  undef isthreaded /* Undo private_namespace.h definition. */
138*8e33eff8Schristos #  define isthreaded true
139*8e33eff8Schristos #endif
140*8e33eff8Schristos 
141*8e33eff8Schristos bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
142*8e33eff8Schristos     witness_rank_t rank, malloc_mutex_lock_order_t lock_order);
143*8e33eff8Schristos void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
144*8e33eff8Schristos void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
145*8e33eff8Schristos void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
146*8e33eff8Schristos bool malloc_mutex_boot(void);
147*8e33eff8Schristos void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex);
148*8e33eff8Schristos 
149*8e33eff8Schristos void malloc_mutex_lock_slow(malloc_mutex_t *mutex);
150*8e33eff8Schristos 
151*8e33eff8Schristos static inline void
152*8e33eff8Schristos malloc_mutex_lock_final(malloc_mutex_t *mutex) {
153*8e33eff8Schristos 	MALLOC_MUTEX_LOCK(mutex);
154*8e33eff8Schristos }
155*8e33eff8Schristos 
156*8e33eff8Schristos static inline bool
157*8e33eff8Schristos malloc_mutex_trylock_final(malloc_mutex_t *mutex) {
158*8e33eff8Schristos 	return MALLOC_MUTEX_TRYLOCK(mutex);
159*8e33eff8Schristos }
160*8e33eff8Schristos 
161*8e33eff8Schristos static inline void
162*8e33eff8Schristos mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) {
163*8e33eff8Schristos 	if (config_stats) {
164*8e33eff8Schristos 		mutex_prof_data_t *data = &mutex->prof_data;
165*8e33eff8Schristos 		data->n_lock_ops++;
166*8e33eff8Schristos 		if (data->prev_owner != tsdn) {
167*8e33eff8Schristos 			data->prev_owner = tsdn;
168*8e33eff8Schristos 			data->n_owner_switches++;
169*8e33eff8Schristos 		}
170*8e33eff8Schristos 	}
171*8e33eff8Schristos }
172*8e33eff8Schristos 
173*8e33eff8Schristos /* Trylock: return false if the lock is successfully acquired. */
174*8e33eff8Schristos static inline bool
175*8e33eff8Schristos malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
176*8e33eff8Schristos 	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
177*8e33eff8Schristos 	if (isthreaded) {
178*8e33eff8Schristos 		if (malloc_mutex_trylock_final(mutex)) {
179*8e33eff8Schristos 			return true;
180*8e33eff8Schristos 		}
181*8e33eff8Schristos 		mutex_owner_stats_update(tsdn, mutex);
182*8e33eff8Schristos 	}
183*8e33eff8Schristos 	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
184*8e33eff8Schristos 
185*8e33eff8Schristos 	return false;
186*8e33eff8Schristos }
187*8e33eff8Schristos 
188*8e33eff8Schristos /* Aggregate lock prof data. */
189*8e33eff8Schristos static inline void
190*8e33eff8Schristos malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
191*8e33eff8Schristos 	nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
192*8e33eff8Schristos 	if (nstime_compare(&sum->max_wait_time, &data->max_wait_time) < 0) {
193*8e33eff8Schristos 		nstime_copy(&sum->max_wait_time, &data->max_wait_time);
194*8e33eff8Schristos 	}
195*8e33eff8Schristos 
196*8e33eff8Schristos 	sum->n_wait_times += data->n_wait_times;
197*8e33eff8Schristos 	sum->n_spin_acquired += data->n_spin_acquired;
198*8e33eff8Schristos 
199*8e33eff8Schristos 	if (sum->max_n_thds < data->max_n_thds) {
200*8e33eff8Schristos 		sum->max_n_thds = data->max_n_thds;
201*8e33eff8Schristos 	}
202*8e33eff8Schristos 	uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds,
203*8e33eff8Schristos 	    ATOMIC_RELAXED);
204*8e33eff8Schristos 	uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32(
205*8e33eff8Schristos 	    &data->n_waiting_thds, ATOMIC_RELAXED);
206*8e33eff8Schristos 	atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds,
207*8e33eff8Schristos 	    ATOMIC_RELAXED);
208*8e33eff8Schristos 	sum->n_owner_switches += data->n_owner_switches;
209*8e33eff8Schristos 	sum->n_lock_ops += data->n_lock_ops;
210*8e33eff8Schristos }
211*8e33eff8Schristos 
212*8e33eff8Schristos static inline void
213*8e33eff8Schristos malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
214*8e33eff8Schristos 	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
215*8e33eff8Schristos 	if (isthreaded) {
216*8e33eff8Schristos 		if (malloc_mutex_trylock_final(mutex)) {
217*8e33eff8Schristos 			malloc_mutex_lock_slow(mutex);
218*8e33eff8Schristos 		}
219*8e33eff8Schristos 		mutex_owner_stats_update(tsdn, mutex);
220*8e33eff8Schristos 	}
221*8e33eff8Schristos 	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
222*8e33eff8Schristos }
223*8e33eff8Schristos 
224*8e33eff8Schristos static inline void
225*8e33eff8Schristos malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
226*8e33eff8Schristos 	witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
227*8e33eff8Schristos 	if (isthreaded) {
228*8e33eff8Schristos 		MALLOC_MUTEX_UNLOCK(mutex);
229*8e33eff8Schristos 	}
230*8e33eff8Schristos }
231*8e33eff8Schristos 
232*8e33eff8Schristos static inline void
233*8e33eff8Schristos malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
234*8e33eff8Schristos 	witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
235*8e33eff8Schristos }
236*8e33eff8Schristos 
237*8e33eff8Schristos static inline void
238*8e33eff8Schristos malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
239*8e33eff8Schristos 	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
240*8e33eff8Schristos }
241*8e33eff8Schristos 
242*8e33eff8Schristos /* Copy the prof data from mutex for processing. */
243*8e33eff8Schristos static inline void
244*8e33eff8Schristos malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
245*8e33eff8Schristos     malloc_mutex_t *mutex) {
246*8e33eff8Schristos 	mutex_prof_data_t *source = &mutex->prof_data;
247*8e33eff8Schristos 	/* Can only read holding the mutex. */
248*8e33eff8Schristos 	malloc_mutex_assert_owner(tsdn, mutex);
249*8e33eff8Schristos 
250*8e33eff8Schristos 	/*
251*8e33eff8Schristos 	 * Not *really* allowed (we shouldn't be doing non-atomic loads of
252*8e33eff8Schristos 	 * atomic data), but the mutex protection makes this safe, and writing
253*8e33eff8Schristos 	 * a member-for-member copy is tedious for this situation.
254*8e33eff8Schristos 	 */
255*8e33eff8Schristos 	*data = *source;
256*8e33eff8Schristos 	/* n_wait_thds is not reported (modified w/o locking). */
257*8e33eff8Schristos 	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
258*8e33eff8Schristos }
259*8e33eff8Schristos 
260*8e33eff8Schristos #endif /* JEMALLOC_INTERNAL_MUTEX_H */
261