1*8e33eff8Schristos #ifndef JEMALLOC_INTERNAL_MUTEX_H 2*8e33eff8Schristos #define JEMALLOC_INTERNAL_MUTEX_H 3*8e33eff8Schristos 4*8e33eff8Schristos #include "jemalloc/internal/atomic.h" 5*8e33eff8Schristos #include "jemalloc/internal/mutex_prof.h" 6*8e33eff8Schristos #include "jemalloc/internal/tsd.h" 7*8e33eff8Schristos #include "jemalloc/internal/witness.h" 8*8e33eff8Schristos 9*8e33eff8Schristos typedef enum { 10*8e33eff8Schristos /* Can only acquire one mutex of a given witness rank at a time. */ 11*8e33eff8Schristos malloc_mutex_rank_exclusive, 12*8e33eff8Schristos /* 13*8e33eff8Schristos * Can acquire multiple mutexes of the same witness rank, but in 14*8e33eff8Schristos * address-ascending order only. 15*8e33eff8Schristos */ 16*8e33eff8Schristos malloc_mutex_address_ordered 17*8e33eff8Schristos } malloc_mutex_lock_order_t; 18*8e33eff8Schristos 19*8e33eff8Schristos typedef struct malloc_mutex_s malloc_mutex_t; 20*8e33eff8Schristos struct malloc_mutex_s { 21*8e33eff8Schristos union { 22*8e33eff8Schristos struct { 23*8e33eff8Schristos /* 24*8e33eff8Schristos * prof_data is defined first to reduce cacheline 25*8e33eff8Schristos * bouncing: the data is not touched by the mutex holder 26*8e33eff8Schristos * during unlocking, while might be modified by 27*8e33eff8Schristos * contenders. Having it before the mutex itself could 28*8e33eff8Schristos * avoid prefetching a modified cacheline (for the 29*8e33eff8Schristos * unlocking thread). 30*8e33eff8Schristos */ 31*8e33eff8Schristos mutex_prof_data_t prof_data; 32*8e33eff8Schristos #ifdef _WIN32 33*8e33eff8Schristos # if _WIN32_WINNT >= 0x0600 34*8e33eff8Schristos SRWLOCK lock; 35*8e33eff8Schristos # else 36*8e33eff8Schristos CRITICAL_SECTION lock; 37*8e33eff8Schristos # endif 38*8e33eff8Schristos #elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) 39*8e33eff8Schristos os_unfair_lock lock; 40*8e33eff8Schristos #elif (defined(JEMALLOC_OSSPIN)) 41*8e33eff8Schristos OSSpinLock lock; 42*8e33eff8Schristos #elif (defined(JEMALLOC_MUTEX_INIT_CB)) 43*8e33eff8Schristos pthread_mutex_t lock; 44*8e33eff8Schristos malloc_mutex_t *postponed_next; 45*8e33eff8Schristos #else 46*8e33eff8Schristos pthread_mutex_t lock; 47*8e33eff8Schristos #endif 48*8e33eff8Schristos }; 49*8e33eff8Schristos /* 50*8e33eff8Schristos * We only touch witness when configured w/ debug. However we 51*8e33eff8Schristos * keep the field in a union when !debug so that we don't have 52*8e33eff8Schristos * to pollute the code base with #ifdefs, while avoid paying the 53*8e33eff8Schristos * memory cost. 54*8e33eff8Schristos */ 55*8e33eff8Schristos #if !defined(JEMALLOC_DEBUG) 56*8e33eff8Schristos witness_t witness; 57*8e33eff8Schristos malloc_mutex_lock_order_t lock_order; 58*8e33eff8Schristos #endif 59*8e33eff8Schristos }; 60*8e33eff8Schristos 61*8e33eff8Schristos #if defined(JEMALLOC_DEBUG) 62*8e33eff8Schristos witness_t witness; 63*8e33eff8Schristos malloc_mutex_lock_order_t lock_order; 64*8e33eff8Schristos #define LOCK_ORDER_INITIALIZER(field, a) field = a, 65*8e33eff8Schristos #else 66*8e33eff8Schristos #define LOCK_ORDER_INITIALIZER(field, a) 67*8e33eff8Schristos #endif 68*8e33eff8Schristos }; 69*8e33eff8Schristos 70*8e33eff8Schristos /* 71*8e33eff8Schristos * Based on benchmark results, a fixed spin with this amount of retries works 72*8e33eff8Schristos * well for our critical sections. 73*8e33eff8Schristos */ 74*8e33eff8Schristos #define MALLOC_MUTEX_MAX_SPIN 250 75*8e33eff8Schristos 76*8e33eff8Schristos #ifdef _WIN32 77*8e33eff8Schristos # if _WIN32_WINNT >= 0x0600 78*8e33eff8Schristos # define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock) 79*8e33eff8Schristos # define MALLOC_MUTEX_UNLOCK(m) ReleaseSRWLockExclusive(&(m)->lock) 80*8e33eff8Schristos # define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock)) 81*8e33eff8Schristos # else 82*8e33eff8Schristos # define MALLOC_MUTEX_LOCK(m) EnterCriticalSection(&(m)->lock) 83*8e33eff8Schristos # define MALLOC_MUTEX_UNLOCK(m) LeaveCriticalSection(&(m)->lock) 84*8e33eff8Schristos # define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock)) 85*8e33eff8Schristos # endif 86*8e33eff8Schristos #elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) 87*8e33eff8Schristos # define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock) 88*8e33eff8Schristos # define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock) 89*8e33eff8Schristos # define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock)) 90*8e33eff8Schristos #elif (defined(JEMALLOC_OSSPIN)) 91*8e33eff8Schristos # define MALLOC_MUTEX_LOCK(m) OSSpinLockLock(&(m)->lock) 92*8e33eff8Schristos # define MALLOC_MUTEX_UNLOCK(m) OSSpinLockUnlock(&(m)->lock) 93*8e33eff8Schristos # define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock)) 94*8e33eff8Schristos #else 95*8e33eff8Schristos # define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock) 96*8e33eff8Schristos # define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock) 97*8e33eff8Schristos # define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0) 98*8e33eff8Schristos #endif 99*8e33eff8Schristos 100*8e33eff8Schristos #ifdef _WIN32 101*8e33eff8Schristos # define MALLOC_MUTEX_INITIALIZER 102*8e33eff8Schristos #elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) 103*8e33eff8Schristos # define MALLOC_MUTEX_INITIALIZER {{{ \ 104*8e33eff8Schristos .prof_data = MUTEX_PROF_DATA_INITIALIZER, \ 105*8e33eff8Schristos .lock = OS_UNFAIR_LOCK_INIT, \ 106*8e33eff8Schristos }}, \ 107*8e33eff8Schristos WITNESS_INITIALIZER(.witness, "mutex", WITNESS_RANK_OMIT) \ 108*8e33eff8Schristos LOCK_ORDER_INITIALIZER(.lock_order, malloc_mutex_rank_exclusive)} 109*8e33eff8Schristos #elif (defined(JEMALLOC_OSSPIN)) 110*8e33eff8Schristos # define MALLOC_MUTEX_INITIALIZER {{{ \ 111*8e33eff8Schristos .prof_data = MUTEX_PROF_DATA_INITIALIZER, \ 112*8e33eff8Schristos .lock = 0, \ 113*8e33eff8Schristos }}, \ 114*8e33eff8Schristos WITNESS_INITIALIZER(.witness, "mutex", WITNESS_RANK_OMIT) \ 115*8e33eff8Schristos LOCK_ORDER_INITIALIZER(.lock_order, malloc_mutex_rank_exclusive)} 116*8e33eff8Schristos #elif (defined(JEMALLOC_MUTEX_INIT_CB)) 117*8e33eff8Schristos # define MALLOC_MUTEX_INITIALIZER {{{ \ 118*8e33eff8Schristos .prof_data = MUTEX_PROF_DATA_INITIALIZER, \ 119*8e33eff8Schristos .lock = PTHREAD_MUTEX_INITIALIZER, \ 120*8e33eff8Schristos .postponed_next = NULL, \ 121*8e33eff8Schristos }}, \ 122*8e33eff8Schristos WITNESS_INITIALIZER(.witness, "mutex", WITNESS_RANK_OMIT) \ 123*8e33eff8Schristos LOCK_ORDER_INITIALIZER(.lock_order, malloc_mutex_rank_exclusive)} 124*8e33eff8Schristos #else 125*8e33eff8Schristos # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT 126*8e33eff8Schristos # define MALLOC_MUTEX_INITIALIZER {{{ \ 127*8e33eff8Schristos .prof_data = MUTEX_PROF_DATA_INITIALIZER, \ 128*8e33eff8Schristos .lock = PTHREAD_MUTEX_INITIALIZER, \ 129*8e33eff8Schristos }}, \ 130*8e33eff8Schristos WITNESS_INITIALIZER(.witness, "mutex", WITNESS_RANK_OMIT) \ 131*8e33eff8Schristos LOCK_ORDER_INITIALIZER(.lock_order, malloc_mutex_rank_exclusive)} 132*8e33eff8Schristos #endif 133*8e33eff8Schristos 134*8e33eff8Schristos #ifdef JEMALLOC_LAZY_LOCK 135*8e33eff8Schristos extern bool isthreaded; 136*8e33eff8Schristos #else 137*8e33eff8Schristos # undef isthreaded /* Undo private_namespace.h definition. */ 138*8e33eff8Schristos # define isthreaded true 139*8e33eff8Schristos #endif 140*8e33eff8Schristos 141*8e33eff8Schristos bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, 142*8e33eff8Schristos witness_rank_t rank, malloc_mutex_lock_order_t lock_order); 143*8e33eff8Schristos void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); 144*8e33eff8Schristos void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); 145*8e33eff8Schristos void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); 146*8e33eff8Schristos bool malloc_mutex_boot(void); 147*8e33eff8Schristos void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex); 148*8e33eff8Schristos 149*8e33eff8Schristos void malloc_mutex_lock_slow(malloc_mutex_t *mutex); 150*8e33eff8Schristos 151*8e33eff8Schristos static inline void 152*8e33eff8Schristos malloc_mutex_lock_final(malloc_mutex_t *mutex) { 153*8e33eff8Schristos MALLOC_MUTEX_LOCK(mutex); 154*8e33eff8Schristos } 155*8e33eff8Schristos 156*8e33eff8Schristos static inline bool 157*8e33eff8Schristos malloc_mutex_trylock_final(malloc_mutex_t *mutex) { 158*8e33eff8Schristos return MALLOC_MUTEX_TRYLOCK(mutex); 159*8e33eff8Schristos } 160*8e33eff8Schristos 161*8e33eff8Schristos static inline void 162*8e33eff8Schristos mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) { 163*8e33eff8Schristos if (config_stats) { 164*8e33eff8Schristos mutex_prof_data_t *data = &mutex->prof_data; 165*8e33eff8Schristos data->n_lock_ops++; 166*8e33eff8Schristos if (data->prev_owner != tsdn) { 167*8e33eff8Schristos data->prev_owner = tsdn; 168*8e33eff8Schristos data->n_owner_switches++; 169*8e33eff8Schristos } 170*8e33eff8Schristos } 171*8e33eff8Schristos } 172*8e33eff8Schristos 173*8e33eff8Schristos /* Trylock: return false if the lock is successfully acquired. */ 174*8e33eff8Schristos static inline bool 175*8e33eff8Schristos malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) { 176*8e33eff8Schristos witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); 177*8e33eff8Schristos if (isthreaded) { 178*8e33eff8Schristos if (malloc_mutex_trylock_final(mutex)) { 179*8e33eff8Schristos return true; 180*8e33eff8Schristos } 181*8e33eff8Schristos mutex_owner_stats_update(tsdn, mutex); 182*8e33eff8Schristos } 183*8e33eff8Schristos witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); 184*8e33eff8Schristos 185*8e33eff8Schristos return false; 186*8e33eff8Schristos } 187*8e33eff8Schristos 188*8e33eff8Schristos /* Aggregate lock prof data. */ 189*8e33eff8Schristos static inline void 190*8e33eff8Schristos malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) { 191*8e33eff8Schristos nstime_add(&sum->tot_wait_time, &data->tot_wait_time); 192*8e33eff8Schristos if (nstime_compare(&sum->max_wait_time, &data->max_wait_time) < 0) { 193*8e33eff8Schristos nstime_copy(&sum->max_wait_time, &data->max_wait_time); 194*8e33eff8Schristos } 195*8e33eff8Schristos 196*8e33eff8Schristos sum->n_wait_times += data->n_wait_times; 197*8e33eff8Schristos sum->n_spin_acquired += data->n_spin_acquired; 198*8e33eff8Schristos 199*8e33eff8Schristos if (sum->max_n_thds < data->max_n_thds) { 200*8e33eff8Schristos sum->max_n_thds = data->max_n_thds; 201*8e33eff8Schristos } 202*8e33eff8Schristos uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds, 203*8e33eff8Schristos ATOMIC_RELAXED); 204*8e33eff8Schristos uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32( 205*8e33eff8Schristos &data->n_waiting_thds, ATOMIC_RELAXED); 206*8e33eff8Schristos atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds, 207*8e33eff8Schristos ATOMIC_RELAXED); 208*8e33eff8Schristos sum->n_owner_switches += data->n_owner_switches; 209*8e33eff8Schristos sum->n_lock_ops += data->n_lock_ops; 210*8e33eff8Schristos } 211*8e33eff8Schristos 212*8e33eff8Schristos static inline void 213*8e33eff8Schristos malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) { 214*8e33eff8Schristos witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); 215*8e33eff8Schristos if (isthreaded) { 216*8e33eff8Schristos if (malloc_mutex_trylock_final(mutex)) { 217*8e33eff8Schristos malloc_mutex_lock_slow(mutex); 218*8e33eff8Schristos } 219*8e33eff8Schristos mutex_owner_stats_update(tsdn, mutex); 220*8e33eff8Schristos } 221*8e33eff8Schristos witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); 222*8e33eff8Schristos } 223*8e33eff8Schristos 224*8e33eff8Schristos static inline void 225*8e33eff8Schristos malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) { 226*8e33eff8Schristos witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); 227*8e33eff8Schristos if (isthreaded) { 228*8e33eff8Schristos MALLOC_MUTEX_UNLOCK(mutex); 229*8e33eff8Schristos } 230*8e33eff8Schristos } 231*8e33eff8Schristos 232*8e33eff8Schristos static inline void 233*8e33eff8Schristos malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) { 234*8e33eff8Schristos witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); 235*8e33eff8Schristos } 236*8e33eff8Schristos 237*8e33eff8Schristos static inline void 238*8e33eff8Schristos malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) { 239*8e33eff8Schristos witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); 240*8e33eff8Schristos } 241*8e33eff8Schristos 242*8e33eff8Schristos /* Copy the prof data from mutex for processing. */ 243*8e33eff8Schristos static inline void 244*8e33eff8Schristos malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data, 245*8e33eff8Schristos malloc_mutex_t *mutex) { 246*8e33eff8Schristos mutex_prof_data_t *source = &mutex->prof_data; 247*8e33eff8Schristos /* Can only read holding the mutex. */ 248*8e33eff8Schristos malloc_mutex_assert_owner(tsdn, mutex); 249*8e33eff8Schristos 250*8e33eff8Schristos /* 251*8e33eff8Schristos * Not *really* allowed (we shouldn't be doing non-atomic loads of 252*8e33eff8Schristos * atomic data), but the mutex protection makes this safe, and writing 253*8e33eff8Schristos * a member-for-member copy is tedious for this situation. 254*8e33eff8Schristos */ 255*8e33eff8Schristos *data = *source; 256*8e33eff8Schristos /* n_wait_thds is not reported (modified w/o locking). */ 257*8e33eff8Schristos atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED); 258*8e33eff8Schristos } 259*8e33eff8Schristos 260*8e33eff8Schristos #endif /* JEMALLOC_INTERNAL_MUTEX_H */ 261